1748 files changed, 87888 insertions, 11630 deletions
diff --git a/.mailmap b/.mailmap
index 43031441b2d9..19eb49e55836 100644
--- a/.mailmap
+++ b/.mailmap
@@ -117,6 +117,7 @@ Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
 Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
 Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
 Chao Yu <chao@kernel.org> <yuchao0@huawei.com>
+Chester Lin <chester62515@gmail.com> <clin@suse.com>
 Chris Chiu <chris.chiu@canonical.com> <chiu@endlessm.com>
 Chris Chiu <chris.chiu@canonical.com> <chiu@endlessos.org>
 Chris Lew <quic_clew@quicinc.com> <clew@codeaurora.org>
diff --git a/CREDITS b/CREDITS
index f33a33fd2371..81845c39e3cf 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2944,6 +2944,14 @@ D: IPX development and support
 N: Venkatesh Pallipadi (Venki)
 D: x86/HPET
 
+N: Antti Palosaari
+E: crope@iki.fi
+D: Various DVB drivers
+W: https://palosaari.fi/linux/
+S: Yliopistokatu 1 D 513
+S: FI-90570 Oulu
+S: FINLAND
+
 N: Kyungmin Park
 E: kyungmin.park@samsung.com
 D: Samsung S5Pv210 and Exynos4210 mobile platforms
diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index 042fd125fbc9..a7a432dc4015 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -1,4 +1,4 @@
-What:           /sys/kernel/debug/accel/<n>/addr
+What:           /sys/kernel/debug/accel/<parent_device>/addr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -8,34 +8,34 @@ Description:    Sets the device address to be used for read or write through
                 only when the IOMMU is disabled.
                 The acceptable value is a string that starts with "0x"
 
-What:           /sys/kernel/debug/accel/<n>/clk_gate
+What:           /sys/kernel/debug/accel/<parent_device>/clk_gate
 Date:           May 2020
 KernelVersion:  5.8
 Contact:        ogabbay@kernel.org
 Description:    This setting is now deprecated as clock gating is handled solely by the f/w
 
-What:           /sys/kernel/debug/accel/<n>/command_buffers
+What:           /sys/kernel/debug/accel/<parent_device>/command_buffers
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays a list with information about the currently allocated
                 command buffers
 
-What:           /sys/kernel/debug/accel/<n>/command_submission
+What:           /sys/kernel/debug/accel/<parent_device>/command_submission
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays a list with information about the currently active
                 command submissions
 
-What:           /sys/kernel/debug/accel/<n>/command_submission_jobs
+What:           /sys/kernel/debug/accel/<parent_device>/command_submission_jobs
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays a list with detailed information about each JOB (CB) of
                 each active command submission
 
-What:           /sys/kernel/debug/accel/<n>/data32
+What:           /sys/kernel/debug/accel/<parent_device>/data32
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -50,7 +50,7 @@ Description:    Allows the root user to read or write directly through the
                 If the IOMMU is disabled, it also allows the root user to read
                 or write from the host a device VA of a host mapped memory
 
-What:           /sys/kernel/debug/accel/<n>/data64
+What:           /sys/kernel/debug/accel/<parent_device>/data64
 Date:           Jan 2020
 KernelVersion:  5.6
 Contact:        ogabbay@kernel.org
@@ -65,7 +65,7 @@ Description:    Allows the root user to read or write 64 bit data directly
                 If the IOMMU is disabled, it also allows the root user to read
                 or write from the host a device VA of a host mapped memory
 
-What:           /sys/kernel/debug/accel/<n>/data_dma
+What:           /sys/kernel/debug/accel/<parent_device>/data_dma
 Date:           Apr 2021
 KernelVersion:  5.13
 Contact:        ogabbay@kernel.org
@@ -83,7 +83,7 @@ Description:    Allows the root user to read from the device's internal
                 workloads.
                 Only supported on GAUDI at this stage.
 
-What:           /sys/kernel/debug/accel/<n>/device
+What:           /sys/kernel/debug/accel/<parent_device>/device
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -91,14 +91,14 @@ Description:    Enables the root user to set the device to specific state.
                 Valid values are "disable", "enable", "suspend", "resume".
                 User can read this property to see the valid values
 
-What:           /sys/kernel/debug/accel/<n>/device_release_watchdog_timeout
+What:           /sys/kernel/debug/accel/<parent_device>/device_release_watchdog_timeout
 Date:           Oct 2022
 KernelVersion:  6.2
 Contact:        ttayar@habana.ai
 Description:    The watchdog timeout value in seconds for a device release upon
                 certain error cases, after which the device is reset.
 
-What:           /sys/kernel/debug/accel/<n>/dma_size
+What:           /sys/kernel/debug/accel/<parent_device>/dma_size
 Date:           Apr 2021
 KernelVersion:  5.13
 Contact:        ogabbay@kernel.org
@@ -108,7 +108,7 @@ Description:    Specify the size of the DMA transaction when using DMA to read
                 When the write is finished, the user can read the "data_dma"
                 blob
 
-What:           /sys/kernel/debug/accel/<n>/dump_razwi_events
+What:           /sys/kernel/debug/accel/<parent_device>/dump_razwi_events
 Date:           Aug 2022
 KernelVersion:  5.20
 Contact:        fkassabri@habana.ai
@@ -117,7 +117,7 @@ Description:    Dumps all razwi events to dmesg if exist.
                 the routine will clear the status register.
                 Usage: cat dump_razwi_events
 
-What:           /sys/kernel/debug/accel/<n>/dump_security_violations
+What:           /sys/kernel/debug/accel/<parent_device>/dump_security_violations
 Date:           Jan 2021
 KernelVersion:  5.12
 Contact:        ogabbay@kernel.org
@@ -125,14 +125,14 @@ Description:    Dumps all security violations to dmesg. This will also ack
                 all security violations meanings those violations will not be
                 dumped next time user calls this API
 
-What:           /sys/kernel/debug/accel/<n>/engines
+What:           /sys/kernel/debug/accel/<parent_device>/engines
 Date:           Jul 2019
 KernelVersion:  5.3
 Contact:        ogabbay@kernel.org
 Description:    Displays the status registers values of the device engines and
                 their derived idle status
 
-What:           /sys/kernel/debug/accel/<n>/i2c_addr
+What:           /sys/kernel/debug/accel/<parent_device>/i2c_addr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -140,7 +140,7 @@ Description:    Sets I2C device address for I2C transaction that is generated
                 by the device's CPU, Not available when device is loaded with secured
                 firmware
 
-What:           /sys/kernel/debug/accel/<n>/i2c_bus
+What:           /sys/kernel/debug/accel/<parent_device>/i2c_bus
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -148,7 +148,7 @@ Description:    Sets I2C bus address for I2C transaction that is generated by
                 the device's CPU, Not available when device is loaded with secured
                 firmware
 
-What:           /sys/kernel/debug/accel/<n>/i2c_data
+What:           /sys/kernel/debug/accel/<parent_device>/i2c_data
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -157,7 +157,7 @@ Description:    Triggers an I2C transaction that is generated by the device's
                 reading from the file generates a read transaction, Not available
                 when device is loaded with secured firmware
 
-What:           /sys/kernel/debug/accel/<n>/i2c_len
+What:           /sys/kernel/debug/accel/<parent_device>/i2c_len
 Date:           Dec 2021
 KernelVersion:  5.17
 Contact:        obitton@habana.ai
@@ -165,7 +165,7 @@ Description:    Sets I2C length in bytes for I2C transaction that is generated b
                 the device's CPU, Not available when device is loaded with secured
                 firmware
 
-What:           /sys/kernel/debug/accel/<n>/i2c_reg
+What:           /sys/kernel/debug/accel/<parent_device>/i2c_reg
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -173,35 +173,35 @@ Description:    Sets I2C register id for I2C transaction that is generated by
                 the device's CPU, Not available when device is loaded with secured
                 firmware
 
-What:           /sys/kernel/debug/accel/<n>/led0
+What:           /sys/kernel/debug/accel/<parent_device>/led0
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the state of the first S/W led on the device, Not available
                 when device is loaded with secured firmware
 
-What:           /sys/kernel/debug/accel/<n>/led1
+What:           /sys/kernel/debug/accel/<parent_device>/led1
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the state of the second S/W led on the device, Not available
                 when device is loaded with secured firmware
 
-What:           /sys/kernel/debug/accel/<n>/led2
+What:           /sys/kernel/debug/accel/<parent_device>/led2
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the state of the third S/W led on the device, Not available
                 when device is loaded with secured firmware
 
-What:           /sys/kernel/debug/accel/<n>/memory_scrub
+What:           /sys/kernel/debug/accel/<parent_device>/memory_scrub
 Date:           May 2022
 KernelVersion:  5.19
 Contact:        dhirschfeld@habana.ai
 Description:    Allows the root user to scrub the dram memory. The scrubbing
                 value can be set using the debugfs file memory_scrub_val.
 
-What:           /sys/kernel/debug/accel/<n>/memory_scrub_val
+What:           /sys/kernel/debug/accel/<parent_device>/memory_scrub_val
 Date:           May 2022
 KernelVersion:  5.19
 Contact:        dhirschfeld@habana.ai
@@ -209,7 +209,7 @@ Description:    The value to which the dram will be set to when the user
                 scrubs the dram using 'memory_scrub' debugfs file and
                 the scrubbing value when using module param 'memory_scrub'
 
-What:           /sys/kernel/debug/accel/<n>/mmu
+What:           /sys/kernel/debug/accel/<parent_device>/mmu
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -219,7 +219,7 @@ Description:    Displays the hop values and physical address for a given ASID
                 e.g. to display info about VA 0x1000 for ASID 1 you need to do:
                 echo "1 0x1000" > /sys/kernel/debug/accel/0/mmu
 
-What:           /sys/kernel/debug/accel/<n>/mmu_error
+What:           /sys/kernel/debug/accel/<parent_device>/mmu_error
 Date:           Mar 2021
 KernelVersion:  5.12
 Contact:        fkassabri@habana.ai
@@ -229,7 +229,7 @@ Description:    Check and display page fault or access violation mmu errors for
                 echo "0x200" > /sys/kernel/debug/accel/0/mmu_error
                 cat /sys/kernel/debug/accel/0/mmu_error
 
-What:           /sys/kernel/debug/accel/<n>/monitor_dump
+What:           /sys/kernel/debug/accel/<parent_device>/monitor_dump
 Date:           Mar 2022
 KernelVersion:  5.19
 Contact:        osharabi@habana.ai
@@ -243,7 +243,7 @@ Description:    Allows the root user to dump monitors status from the device's
                 This interface doesn't support concurrency in the same device.
                 Only supported on GAUDI.
 
-What:           /sys/kernel/debug/accel/<n>/monitor_dump_trig
+What:           /sys/kernel/debug/accel/<parent_device>/monitor_dump_trig
 Date:           Mar 2022
 KernelVersion:  5.19
 Contact:        osharabi@habana.ai
@@ -253,14 +253,14 @@ Description:    Triggers dump of monitor data. The value to trigger the operatio
                 When the write is finished, the user can read the "monitor_dump"
                 blob
 
-What:           /sys/kernel/debug/accel/<n>/set_power_state
+What:           /sys/kernel/debug/accel/<parent_device>/set_power_state
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets the PCI power state. Valid values are "1" for D0 and "2"
                 for D3Hot
 
-What:           /sys/kernel/debug/accel/<n>/skip_reset_on_timeout
+What:           /sys/kernel/debug/accel/<parent_device>/skip_reset_on_timeout
 Date:           Jun 2021
 KernelVersion:  5.13
 Contact:        ynudelman@habana.ai
@@ -268,7 +268,7 @@ Description:    Sets the skip reset on timeout option for the device. Value of
                 "0" means device will be reset in case some CS has timed out,
                 otherwise it will not be reset.
 
-What:           /sys/kernel/debug/accel/<n>/state_dump
+What:           /sys/kernel/debug/accel/<parent_device>/state_dump
 Date:           Oct 2021
 KernelVersion:  5.15
 Contact:        ynudelman@habana.ai
@@ -279,7 +279,7 @@ Description:    Gets the state dump occurring on a CS timeout or failure.
                 Writing an integer X discards X state dumps, so that the
                 next read would return X+1-st newest state dump.
 
-What:           /sys/kernel/debug/accel/<n>/stop_on_err
+What:           /sys/kernel/debug/accel/<parent_device>/stop_on_err
 Date:           Mar 2020
 KernelVersion:  5.6
 Contact:        ogabbay@kernel.org
@@ -287,13 +287,13 @@ Description:    Sets the stop-on_error option for the device engines. Value of
                 "0" is for disable, otherwise enable.
                 Relevant only for GOYA and GAUDI.
 
-What:           /sys/kernel/debug/accel/<n>/timeout_locked
+What:           /sys/kernel/debug/accel/<parent_device>/timeout_locked
 Date:           Sep 2021
 KernelVersion:  5.16
 Contact:        obitton@habana.ai
 Description:    Sets the command submission timeout value in seconds.
 
-What:           /sys/kernel/debug/accel/<n>/userptr
+What:           /sys/kernel/debug/accel/<parent_device>/userptr
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
@@ -301,7 +301,7 @@ Description:    Displays a list with information about the current user
                 pointers (user virtual addresses) that are pinned and mapped
                 to DMA addresses
 
-What:           /sys/kernel/debug/accel/<n>/userptr_lookup
+What:           /sys/kernel/debug/accel/<parent_device>/userptr_lookup
 Date:           Oct 2021
 KernelVersion:  5.15
 Contact:        ogabbay@kernel.org
@@ -309,7 +309,7 @@ Description:    Allows to search for specific user pointers (user virtual
                 addresses) that are pinned and mapped to DMA addresses, and see
                 their resolution to the specific dma address.
 
-What:           /sys/kernel/debug/accel/<n>/vm
+What:           /sys/kernel/debug/accel/<parent_device>/vm
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
diff --git a/Documentation/ABI/testing/sysfs-bus-optee-devices b/Documentation/ABI/testing/sysfs-bus-optee-devices
index 0f58701367b6..af31e5a22d89 100644
--- a/Documentation/ABI/testing/sysfs-bus-optee-devices
+++ b/Documentation/ABI/testing/sysfs-bus-optee-devices
@@ -6,3 +6,12 @@ Description:
 		OP-TEE bus provides reference to registered drivers under this directory. The <uuid>
 		matches Trusted Application (TA) driver and corresponding TA in secure OS. Drivers
 		are free to create needed API under optee-ta-<uuid> directory.
+
+What:		/sys/bus/tee/devices/optee-ta-<uuid>/need_supplicant
+Date:		November 2023
+KernelVersion:	6.7
+Contact:	op-tee@lists.trustedfirmware.org
+Description:
+		Allows to distinguish whether an OP-TEE based TA/device requires user-space
+		tee-supplicant to function properly or not. This attribute will be present for
+		devices which depend on tee-supplicant to be running.
diff --git a/Documentation/ABI/testing/sysfs-class-led b/Documentation/ABI/testing/sysfs-class-led
index b2ff0012c0f2..2e24ac3bd7ef 100644
--- a/Documentation/ABI/testing/sysfs-class-led
+++ b/Documentation/ABI/testing/sysfs-class-led
@@ -59,15 +59,6 @@ Description:
 		brightness. Reading this file when no hw brightness change
 		event has happened will return an ENODATA error.
 
-What:		/sys/class/leds/<led>/color
-Date:		June 2023
-KernelVersion:	6.5
-Description:
-		Color of the LED.
-
-		This is a read-only file. Reading this file returns the color
-		of the LED as a string (e.g: "red", "green", "multicolor").
-
 What:		/sys/class/leds/<led>/trigger
 Date:		March 2006
 KernelVersion:	2.6.17
diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs
index c63ca1ad500d..4244f5af4b54 100644
--- a/Documentation/ABI/testing/sysfs-driver-habanalabs
+++ b/Documentation/ABI/testing/sysfs-driver-habanalabs
@@ -149,6 +149,18 @@ Contact:        ogabbay@kernel.org
 Description:    Displays the current clock frequency, in Hz, of the MME compute
                 engine. This property is valid only for the Goya ASIC family
 
+What:           /sys/class/accel/accel<n>/device/module_id
+Date:           Nov 2023
+KernelVersion:  not yet upstreamed
+Contact:        ogabbay@kernel.org
+Description:    Displays the device's module id
+
+What:           /sys/class/accel/accel<n>/device/parent_device
+Date:           Nov 2023
+KernelVersion:  6.8
+Contact:        ttayar@habana.ai
+Description:    Displays the name of the parent device of the accel device
+
 What:           /sys/class/accel/accel<n>/device/pci_addr
 Date:           Jan 2019
 KernelVersion:  5.1
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
new file mode 100644
index 000000000000..8c321bc9dc04
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
@@ -0,0 +1,70 @@
+What:		/sys/devices/.../hwmon/hwmon<i>/power1_max
+Date:		September 2023
+KernelVersion:	6.5
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RW. Card reactive sustained  (PL1) power limit in microwatts.
+
+		The power controller will throttle the operating frequency
+		if the power averaged over a window (typically seconds)
+		exceeds this limit. A read value of 0 means that the PL1
+		power limit is disabled, writing 0 disables the
+		limit. Writing values > 0 and <= TDP will enable the power limit.
+
+		Only supported for particular Intel xe graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/power1_rated_max
+Date:		September 2023
+KernelVersion:	6.5
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RO. Card default power limit (default TDP setting).
+
+		Only supported for particular Intel xe graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/power1_crit
+Date:		September 2023
+KernelVersion:	6.5
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RW. Card reactive critical (I1) power limit in microwatts.
+
+		Card reactive critical (I1) power limit in microwatts is exposed
+		for client products. The power controller will throttle the
+		operating frequency if the power averaged over a window exceeds
+		this limit.
+
+		Only supported for particular Intel xe graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/curr1_crit
+Date:		September 2023
+KernelVersion:	6.5
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RW. Card reactive critical (I1) power limit in milliamperes.
+
+		Card reactive critical (I1) power limit in milliamperes is
+		exposed for server products. The power controller will throttle
+		the operating frequency if the power averaged over a window
+		exceeds this limit.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/in0_input
+Date:		September 2023
+KernelVersion:	6.5
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RO. Current Voltage in millivolt.
+
+		Only supported for particular Intel xe graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/energy1_input
+Date:		September 2023
+KernelVersion:	6.5
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RO. Energy input of device in microjoules.
+
+		Only supported for particular Intel xe graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/power1_max_interval
+Date:		October 2023
+KernelVersion:	6.6
+Contact:	intel-xe@lists.freedesktop.org
+Description:	RW. Sustained power limit interval (Tau in PL1/Tau) in
+		milliseconds over which sustained power is averaged.
+
+		Only supported for particular Intel xe graphics platforms.
diff --git a/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml b/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml
index 987aa83c2649..df20a3c9c744 100644
--- a/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml
@@ -9,6 +9,9 @@ title: Analog Devices ADV7533/35 HDMI Encoders
 maintainers:
   - Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 
+allOf:
+  - $ref: /schemas/sound/dai-common.yaml#
+
 description: |
   The ADV7533 and ADV7535 are HDMI audio and video transmitters
   compatible with HDMI 1.4 and DVI 1.0. They support color space
@@ -89,6 +92,9 @@ properties:
     $ref: /schemas/types.yaml#/definitions/uint32
     enum: [ 1, 2, 3, 4 ]
 
+  "#sound-dai-cells":
+    const: 0
+
   ports:
     description:
       The ADV7533/35 has two video ports and one audio port.
diff --git a/Documentation/devicetree/bindings/display/fsl,lcdif.yaml b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml
index fc11ab5fc465..1c2be8d6f633 100644
--- a/Documentation/devicetree/bindings/display/fsl,lcdif.yaml
+++ b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml
@@ -51,7 +51,10 @@ properties:
     minItems: 1
 
   interrupts:
-    maxItems: 1
+    items:
+      - description: LCDIF DMA interrupt
+      - description: LCDIF Error interrupt
+    minItems: 1
 
   power-domains:
     maxItems: 1
@@ -131,6 +134,21 @@ allOf:
     then:
       required:
         - power-domains
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - fsl,imx23-lcdif
+    then:
+      properties:
+        interrupts:
+          minItems: 2
+          maxItems: 2
+    else:
+      properties:
+        interrupts:
+          maxItems: 1
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml
index 537e5304b730..ed24b617090b 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml
@@ -10,7 +10,6 @@ maintainers:
   - Chun-Kuang Hu <chunkuang.hu@kernel.org>
   - Philipp Zabel <p.zabel@pengutronix.de>
   - Jitao Shi <jitao.shi@mediatek.com>
-  - Xinlei Lee <xinlei.lee@mediatek.com>
 
 description: |
   The MediaTek DSI function block is a sink of the display subsystem and can
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.yaml
index 801fa66ae615..677882348ede 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.yaml
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.yaml
@@ -23,7 +23,11 @@ description:
 
 properties:
   compatible:
-    const: mediatek,mt8195-disp-ethdr
+    oneOf:
+      - const: mediatek,mt8195-disp-ethdr
+      - items:
+          - const: mediatek,mt8188-disp-ethdr
+          - const: mediatek,mt8195-disp-ethdr
 
   reg:
     maxItems: 7
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp-rdma.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp-rdma.yaml
index dd12e2ff685c..7570a0684967 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp-rdma.yaml
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp-rdma.yaml
@@ -21,7 +21,11 @@ description:
 
 properties:
   compatible:
-    const: mediatek,mt8195-vdo1-rdma
+    oneOf:
+      - const: mediatek,mt8195-vdo1-rdma
+      - items:
+          - const: mediatek,mt8188-vdo1-rdma
+          - const: mediatek,mt8195-vdo1-rdma
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,merge.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,merge.yaml
index eead5cb8636e..5c678695162e 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,merge.yaml
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,merge.yaml
@@ -27,6 +27,9 @@ properties:
       - items:
           - const: mediatek,mt6795-disp-merge
           - const: mediatek,mt8173-disp-merge
+      - items:
+          - const: mediatek,mt8188-disp-merge
+          - const: mediatek,mt8195-disp-merge
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,padding.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,padding.yaml
new file mode 100644
index 000000000000..6bad7dc2d69f
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,padding.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/mediatek/mediatek,padding.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Display Padding
+
+maintainers:
+  - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+  - Philipp Zabel <p.zabel@pengutronix.de>
+
+description:
+  Padding provides ability to add pixels to width and height of a layer with
+  specified colors. Due to hardware design, Mixer in VDOSYS1 requires
+  width of a layer to be 2-pixel-align, or 4-pixel-align when ETHDR is enabled,
+  we need Padding to deal with odd width.
+  Please notice that even if the Padding is in bypass mode, settings in
+  register must be cleared to 0, or undefined behaviors could happen.
+
+properties:
+  compatible:
+    const: mediatek,mt8188-disp-padding
+
+  reg:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Padding's clocks
+
+  mediatek,gce-client-reg:
+    description:
+      GCE (Global Command Engine) is a multi-core micro processor that helps
+      its clients to execute commands without interrupting CPU. This property
+      describes GCE client's information that is composed by 4 fields.
+      1. Phandle of the GCE (there may be several GCE processors)
+      2. Sub-system ID defined in the dt-binding like a user ID
+         (Please refer to include/dt-bindings/gce/<chip>-gce.h)
+      3. Offset from base address of the subsys you are at
+      4. Size of the register the client needs
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    items:
+      items:
+        - description: Phandle of the GCE
+        - description: Subsys ID defined in the dt-binding
+        - description: Offset from base address of the subsys
+        - description: Size of register
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - power-domains
+  - clocks
+  - mediatek,gce-client-reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/mediatek,mt8188-clk.h>
+    #include <dt-bindings/power/mediatek,mt8188-power.h>
+    #include <dt-bindings/gce/mt8195-gce.h>
+
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        padding0: padding@1c11d000 {
+            compatible = "mediatek,mt8188-disp-padding";
+            reg = <0 0x1c11d000 0 0x1000>;
+            clocks = <&vdosys1 CLK_VDO1_PADDING0>;
+            power-domains = <&spm MT8188_POWER_DOMAIN_VDOSYS1>;
+            mediatek,gce-client-reg = <&gce0 SUBSYS_1c11XXXX 0xd000 0x1000>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
index dbe398f84ffb..ae53cbfb2193 100644
--- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
@@ -26,8 +26,10 @@ properties:
           - qcom,sc8280xp-edp
           - qcom,sdm845-dp
           - qcom,sm8350-dp
+          - qcom,sm8650-dp
       - items:
           - enum:
+              - qcom,sm8150-dp
               - qcom,sm8250-dp
               - qcom,sm8450-dp
               - qcom,sm8550-dp
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
index c6dbab65d5f7..4219936eda5a 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
@@ -25,6 +25,7 @@ properties:
               - qcom,sc7180-dsi-ctrl
               - qcom,sc7280-dsi-ctrl
               - qcom,sdm660-dsi-ctrl
+              - qcom,sdm670-dsi-ctrl
               - qcom,sdm845-dsi-ctrl
               - qcom,sm6115-dsi-ctrl
               - qcom,sm6125-dsi-ctrl
@@ -35,6 +36,7 @@ properties:
               - qcom,sm8350-dsi-ctrl
               - qcom,sm8450-dsi-ctrl
               - qcom,sm8550-dsi-ctrl
+              - qcom,sm8650-dsi-ctrl
           - const: qcom,mdss-dsi-ctrl
       - enum:
           - qcom,dsi-ctrl-6g-qcm2290
@@ -333,6 +335,7 @@ allOf:
               - qcom,sm8350-dsi-ctrl
               - qcom,sm8450-dsi-ctrl
               - qcom,sm8550-dsi-ctrl
+              - qcom,sm8650-dsi-ctrl
     then:
       properties:
         clocks:
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
index dd6619555a12..7e764eac3ef3 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
@@ -22,6 +22,7 @@ properties:
       - qcom,sm8350-dsi-phy-5nm
       - qcom,sm8450-dsi-phy-5nm
       - qcom,sm8550-dsi-phy-4nm
+      - qcom,sm8650-dsi-phy-4nm
 
   reg:
     items:
diff --git a/Documentation/devicetree/bindings/display/msm/mdss-common.yaml b/Documentation/devicetree/bindings/display/msm/mdss-common.yaml
index f69196e4cc76..c6305a6e0334 100644
--- a/Documentation/devicetree/bindings/display/msm/mdss-common.yaml
+++ b/Documentation/devicetree/bindings/display/msm/mdss-common.yaml
@@ -61,17 +61,27 @@ properties:
 
   ranges: true
 
+  # This is not a perfect description, but it's impossible to discern and match
+  # the entries like we do with interconnect-names
   interconnects:
     minItems: 1
     items:
       - description: Interconnect path from mdp0 (or a single mdp) port to the data bus
       - description: Interconnect path from mdp1 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    minItems: 1
-    items:
-      - const: mdp0-mem
-      - const: mdp1-mem
+    oneOf:
+      - minItems: 1
+        items:
+          - const: mdp0-mem
+          - const: cpu-cfg
+
+      - minItems: 2
+        items:
+          - const: mdp0-mem
+          - const: mdp1-mem
+          - const: cpu-cfg
 
   resets:
     items:
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
index 5ad155612b6c..f0cdb5422688 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
@@ -36,10 +36,14 @@ properties:
     maxItems: 2
 
   interconnects:
-    maxItems: 1
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    maxItems: 1
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
@@ -56,7 +60,9 @@ patternProperties:
 
     properties:
       compatible:
-        const: qcom,dsi-ctrl-6g-qcm2290
+        items:
+          - const: qcom,qcm2290-dsi-ctrl
+          - const: qcom,mdss-dsi-ctrl
 
   "^phy@[0-9a-f]+$":
     type: object
@@ -96,8 +102,10 @@ examples:
         interrupt-controller;
         #interrupt-cells = <1>;
 
-        interconnects = <&mmrt_virt MASTER_MDP0 &bimc SLAVE_EBI1>;
-        interconnect-names = "mdp0-mem";
+        interconnects = <&mmrt_virt MASTER_MDP0 &bimc SLAVE_EBI1>,
+                        <&bimc MASTER_APPSS_PROC &config_noc SLAVE_DISPLAY_CFG>;
+        interconnect-names = "mdp0-mem",
+                             "cpu-cfg";
 
         iommus = <&apps_smmu 0x420 0x2>,
                  <&apps_smmu 0x421 0x0>;
@@ -136,7 +144,8 @@ examples:
         };
 
         dsi@5e94000 {
-            compatible = "qcom,dsi-ctrl-6g-qcm2290";
+            compatible = "qcom,qcm2290-dsi-ctrl",
+                         "qcom,mdss-dsi-ctrl";
             reg = <0x05e94000 0x400>;
             reg-names = "dsi_ctrl";
 
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml
index 3432a2407caa..7a0555b15ddf 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml
@@ -36,10 +36,14 @@ properties:
     maxItems: 1
 
   interconnects:
-    maxItems: 1
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    maxItems: 1
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
@@ -106,8 +110,10 @@ examples:
         interrupt-controller;
         #interrupt-cells = <1>;
 
-        interconnects = <&mmss_noc MASTER_MDP0 &mc_virt SLAVE_EBI1>;
-        interconnect-names = "mdp0-mem";
+        interconnects = <&mmss_noc MASTER_MDP0 &mc_virt SLAVE_EBI1>,
+                        <&gem_noc MASTER_APPSS_PROC &config_noc SLAVE_DISPLAY_CFG>;
+        interconnect-names = "mdp0-mem",
+                             "cpu-cfg";
 
         iommus = <&apps_smmu 0x800 0x2>;
         ranges;
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml
index bbb727831fca..2947f27e0585 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml
@@ -36,10 +36,14 @@ properties:
     maxItems: 1
 
   interconnects:
-    maxItems: 1
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    maxItems: 1
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
@@ -118,8 +122,10 @@ examples:
         interrupt-controller;
         #interrupt-cells = <1>;
 
-        interconnects = <&mmss_noc MASTER_MDP0 &mc_virt SLAVE_EBI1>;
-        interconnect-names = "mdp0-mem";
+        interconnects = <&mmss_noc MASTER_MDP0 &mc_virt SLAVE_EBI1>,
+                        <&gem_noc MASTER_APPSS_PROC &cnoc2 SLAVE_DISPLAY_CFG>;
+        interconnect-names = "mdp0-mem",
+                             "cpu-cfg";
 
         iommus = <&apps_smmu 0x900 0x402>;
         ranges;
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sdm670-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sdm670-mdss.yaml
new file mode 100644
index 000000000000..7dc269322b8e
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sdm670-mdss.yaml
@@ -0,0 +1,292 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/qcom,sdm670-mdss.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SDM670 Display MDSS
+
+maintainers:
+  - Richard Acayan <mailingradian@gmail.com>
+
+description:
+  SDM670 MSM Mobile Display Subsystem (MDSS), which encapsulates sub-blocks
+  like DPU display controller, DSI and DP interfaces etc.
+
+$ref: /schemas/display/msm/mdss-common.yaml#
+
+properties:
+  compatible:
+    const: qcom,sdm670-mdss
+
+  clocks:
+    items:
+      - description: Display AHB clock from gcc
+      - description: Display core clock
+
+  clock-names:
+    items:
+      - const: iface
+      - const: core
+
+  iommus:
+    maxItems: 2
+
+  interconnects:
+    maxItems: 2
+
+  interconnect-names:
+    maxItems: 2
+
+patternProperties:
+  "^display-controller@[0-9a-f]+$":
+    type: object
+    additionalProperties: true
+
+    properties:
+      compatible:
+        const: qcom,sdm670-dpu
+
+  "^displayport-controller@[0-9a-f]+$":
+    type: object
+    additionalProperties: true
+
+    properties:
+      compatible:
+        const: qcom,sdm670-dp
+
+  "^dsi@[0-9a-f]+$":
+    type: object
+    additionalProperties: true
+
+    properties:
+      compatible:
+        contains:
+          const: qcom,sdm670-dsi-ctrl
+
+  "^phy@[0-9a-f]+$":
+    type: object
+    additionalProperties: true
+
+    properties:
+      compatible:
+        const: qcom,dsi-phy-10nm
+
+required:
+  - compatible
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,dispcc-sdm845.h>
+    #include <dt-bindings/clock/qcom,gcc-sdm845.h>
+    #include <dt-bindings/clock/qcom,rpmh.h>
+    #include <dt-bindings/interconnect/qcom,sdm670-rpmh.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/qcom-rpmpd.h>
+
+    display-subsystem@ae00000 {
+        compatible = "qcom,sdm670-mdss";
+        reg = <0x0ae00000 0x1000>;
+        reg-names = "mdss";
+        power-domains = <&dispcc MDSS_GDSC>;
+
+        clocks = <&gcc GCC_DISP_AHB_CLK>,
+                 <&dispcc DISP_CC_MDSS_MDP_CLK>;
+        clock-names = "iface", "core";
+
+        interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-controller;
+        #interrupt-cells = <1>;
+
+        interconnects = <&mmss_noc MASTER_MDP_PORT0 0 &mem_noc SLAVE_EBI_CH0 0>,
+                        <&mmss_noc MASTER_MDP_PORT1 0 &mem_noc SLAVE_EBI_CH0 0>;
+        interconnect-names = "mdp0-mem", "mdp1-mem";
+
+        iommus = <&apps_smmu 0x880 0x8>,
+                 <&apps_smmu 0xc80 0x8>;
+
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges;
+
+        display-controller@ae01000 {
+            compatible = "qcom,sdm670-dpu";
+            reg = <0x0ae01000 0x8f000>,
+                  <0x0aeb0000 0x2008>;
+            reg-names = "mdp", "vbif";
+
+            clocks = <&gcc GCC_DISP_AXI_CLK>,
+                     <&dispcc DISP_CC_MDSS_AHB_CLK>,
+                     <&dispcc DISP_CC_MDSS_AXI_CLK>,
+                     <&dispcc DISP_CC_MDSS_MDP_CLK>,
+                     <&dispcc DISP_CC_MDSS_VSYNC_CLK>;
+            clock-names = "gcc-bus", "iface", "bus", "core", "vsync";
+
+            interrupt-parent = <&mdss>;
+            interrupts = <0>;
+            power-domains = <&rpmhpd SDM670_CX>;
+            operating-points-v2 = <&mdp_opp_table>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    dpu_intf1_out: endpoint {
+                        remote-endpoint = <&mdss_dsi0_in>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    dpu_intf2_out: endpoint {
+                        remote-endpoint = <&mdss_dsi1_in>;
+                    };
+                };
+            };
+        };
+
+        dsi@ae94000 {
+            compatible = "qcom,sdm670-dsi-ctrl", "qcom,mdss-dsi-ctrl";
+            reg = <0x0ae94000 0x400>;
+            reg-names = "dsi_ctrl";
+
+            interrupt-parent = <&mdss>;
+            interrupts = <4>;
+
+            clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK>,
+                     <&dispcc DISP_CC_MDSS_BYTE0_INTF_CLK>,
+                     <&dispcc DISP_CC_MDSS_PCLK0_CLK>,
+                     <&dispcc DISP_CC_MDSS_ESC0_CLK>,
+                     <&dispcc DISP_CC_MDSS_AHB_CLK>,
+                     <&dispcc DISP_CC_MDSS_AXI_CLK>;
+            clock-names = "byte",
+                          "byte_intf",
+                          "pixel",
+                          "core",
+                          "iface",
+                          "bus";
+            assigned-clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK_SRC>,
+                              <&dispcc DISP_CC_MDSS_PCLK0_CLK_SRC>;
+            assigned-clock-parents = <&mdss_dsi0_phy 0>, <&mdss_dsi0_phy 1>;
+
+            operating-points-v2 = <&dsi_opp_table>;
+            power-domains = <&rpmhpd SDM670_CX>;
+
+            phys = <&mdss_dsi0_phy>;
+            phy-names = "dsi";
+
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    mdss_dsi0_in: endpoint {
+                        remote-endpoint = <&dpu_intf1_out>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    mdss_dsi0_out: endpoint {
+                    };
+                };
+            };
+        };
+
+        mdss_dsi0_phy: phy@ae94400 {
+            compatible = "qcom,dsi-phy-10nm";
+            reg = <0x0ae94400 0x200>,
+                  <0x0ae94600 0x280>,
+                  <0x0ae94a00 0x1e0>;
+            reg-names = "dsi_phy",
+                        "dsi_phy_lane",
+                        "dsi_pll";
+
+            #clock-cells = <1>;
+            #phy-cells = <0>;
+
+            clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+                     <&rpmhcc RPMH_CXO_CLK>;
+            clock-names = "iface", "ref";
+            vdds-supply = <&vreg_dsi_phy>;
+        };
+
+        dsi@ae96000 {
+            compatible = "qcom,sdm670-dsi-ctrl", "qcom,mdss-dsi-ctrl";
+            reg = <0x0ae96000 0x400>;
+            reg-names = "dsi_ctrl";
+
+            interrupt-parent = <&mdss>;
+            interrupts = <5>;
+
+            clocks = <&dispcc DISP_CC_MDSS_BYTE1_CLK>,
+                     <&dispcc DISP_CC_MDSS_BYTE1_INTF_CLK>,
+                     <&dispcc DISP_CC_MDSS_PCLK1_CLK>,
+                     <&dispcc DISP_CC_MDSS_ESC1_CLK>,
+                     <&dispcc DISP_CC_MDSS_AHB_CLK>,
+                     <&dispcc DISP_CC_MDSS_AXI_CLK>;
+            clock-names = "byte",
+                          "byte_intf",
+                          "pixel",
+                          "core",
+                          "iface",
+                          "bus";
+            assigned-clocks = <&dispcc DISP_CC_MDSS_BYTE1_CLK_SRC>,
+                              <&dispcc DISP_CC_MDSS_PCLK1_CLK_SRC>;
+            assigned-clock-parents = <&dsi1_phy 0>, <&dsi1_phy 1>;
+
+            operating-points-v2 = <&dsi_opp_table>;
+            power-domains = <&rpmhpd SDM670_CX>;
+
+            phys = <&dsi1_phy>;
+            phy-names = "dsi";
+
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    mdss_dsi1_in: endpoint {
+                        remote-endpoint = <&dpu_intf2_out>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    mdss_dsi1_out: endpoint {
+                    };
+                };
+            };
+        };
+
+        mdss_dsi1_phy: phy@ae96400 {
+            compatible = "qcom,dsi-phy-10nm";
+            reg = <0x0ae96400 0x200>,
+                  <0x0ae96600 0x280>,
+                  <0x0ae96a00 0x10e>;
+            reg-names = "dsi_phy",
+                        "dsi_phy_lane",
+                        "dsi_pll";
+
+            #clock-cells = <1>;
+            #phy-cells = <0>;
+
+            clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+                     <&rpmhcc RPMH_CXO_CLK>;
+            clock-names = "iface", "ref";
+            vdds-supply = <&vreg_dsi_phy>;
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sdm845-dpu.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sdm845-dpu.yaml
index b917064bdf33..dc11fd421a27 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sdm845-dpu.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sdm845-dpu.yaml
@@ -13,7 +13,9 @@ $ref: /schemas/display/msm/dpu-common.yaml#
 
 properties:
   compatible:
-    const: qcom,sdm845-dpu
+    enum:
+      - qcom,sdm670-dpu
+      - qcom,sdm845-dpu
 
   reg:
     items:
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml
index dde5c2acead5..309de1953c88 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml
@@ -29,6 +29,16 @@ properties:
   iommus:
     maxItems: 2
 
+  interconnects:
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
+
+  interconnect-names:
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
+
 patternProperties:
   "^display-controller@[0-9a-f]+$":
     type: object
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml
index 671c2c2aa896..3deb9dc81c9c 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml
@@ -35,10 +35,14 @@ properties:
     maxItems: 1
 
   interconnects:
-    maxItems: 2
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    maxItems: 2
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml
index e1dcb453762e..c9ba1fae8042 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml
@@ -35,10 +35,14 @@ properties:
     maxItems: 1
 
   interconnects:
-    maxItems: 2
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    maxItems: 2
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml
index b15c3950f09d..8e8a288d318c 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml
@@ -35,10 +35,14 @@ properties:
     maxItems: 1
 
   interconnects:
-    maxItems: 2
+    items:
+      - description: Interconnect path from mdp0 port to the data bus
+      - description: Interconnect path from CPU to the reg bus
 
   interconnect-names:
-    maxItems: 2
+    items:
+      - const: mdp0-mem
+      - const: cpu-cfg
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
index a2a8be7f64a9..c0d6a4fdff97 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
@@ -69,7 +69,7 @@ patternProperties:
 
     properties:
       compatible:
-        const: qcom,dsi-phy-7nm
+        const: qcom,dsi-phy-7nm-8150
 
 unevaluatedProperties: false
 
@@ -247,7 +247,7 @@ examples:
         };
 
         dsi0_phy: phy@ae94400 {
-            compatible = "qcom,dsi-phy-7nm";
+            compatible = "qcom,dsi-phy-7nm-8150";
             reg = <0x0ae94400 0x200>,
                   <0x0ae94600 0x280>,
                   <0x0ae94900 0x260>;
@@ -318,7 +318,7 @@ examples:
         };
 
         dsi1_phy: phy@ae96400 {
-            compatible = "qcom,dsi-phy-7nm";
+            compatible = "qcom,dsi-phy-7nm-8150";
             reg = <0x0ae96400 0x200>,
                   <0x0ae96600 0x280>,
                   <0x0ae96900 0x260>;
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml
index 994975909fea..51368cda7b2f 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml
@@ -52,6 +52,16 @@ patternProperties:
       compatible:
         const: qcom,sm8250-dpu
 
+  "^displayport-controller@[0-9a-f]+$":
+    type: object
+    additionalProperties: true
+
+    properties:
+      compatible:
+        items:
+          - const: qcom,sm8250-dp
+          - const: qcom,sm8350-dp
+
   "^dsi@[0-9a-f]+$":
     type: object
     additionalProperties: true
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml
index 001b26e65301..747a2e9665f4 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml
@@ -30,10 +30,10 @@ properties:
     maxItems: 1
 
   interconnects:
-    maxItems: 2
+    maxItems: 3
 
   interconnect-names:
-    maxItems: 2
+    maxItems: 3
 
 patternProperties:
   "^display-controller@[0-9a-f]+$":
@@ -91,9 +91,12 @@ examples:
         reg = <0x0ae00000 0x1000>;
         reg-names = "mdss";
 
-        interconnects = <&mmss_noc MASTER_MDP_DISP 0 &mc_virt SLAVE_EBI1_DISP 0>,
-                        <&mmss_noc MASTER_MDP_DISP 0 &mc_virt SLAVE_EBI1_DISP 0>;
-        interconnect-names = "mdp0-mem", "mdp1-mem";
+        interconnects = <&mmss_noc MASTER_MDP_DISP &mc_virt SLAVE_EBI1_DISP>,
+                        <&mmss_noc MASTER_MDP_DISP &mc_virt SLAVE_EBI1_DISP>,
+                        <&gem_noc MASTER_APPSS_PROC &config_noc SLAVE_DISPLAY_CFG>;
+        interconnect-names = "mdp0-mem",
+                             "mdp1-mem",
+                             "cpu-cfg";
 
         resets = <&dispcc DISP_CC_MDSS_CORE_BCR>;
 
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml
new file mode 100644
index 000000000000..a01d15a03317
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/qcom,sm8650-dpu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SM8650 Display DPU
+
+maintainers:
+  - Neil Armstrong <neil.armstrong@linaro.org>
+
+$ref: /schemas/display/msm/dpu-common.yaml#
+
+properties:
+  compatible:
+    const: qcom,sm8650-dpu
+
+  reg:
+    items:
+      - description: Address offset and size for mdp register set
+      - description: Address offset and size for vbif register set
+
+  reg-names:
+    items:
+      - const: mdp
+      - const: vbif
+
+  clocks:
+    items:
+      - description: Display hf axi
+      - description: Display MDSS ahb
+      - description: Display lut
+      - description: Display core
+      - description: Display vsync
+
+  clock-names:
+    items:
+      - const: nrt_bus
+      - const: iface
+      - const: lut
+      - const: core
+      - const: vsync
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - clocks
+  - clock-names
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/qcom,rpmhpd.h>
+
+    display-controller@ae01000 {
+        compatible = "qcom,sm8650-dpu";
+        reg = <0x0ae01000 0x8f000>,
+              <0x0aeb0000 0x2008>;
+        reg-names = "mdp", "vbif";
+
+        clocks = <&gcc_axi_clk>,
+                 <&dispcc_ahb_clk>,
+                 <&dispcc_mdp_lut_clk>,
+                 <&dispcc_mdp_clk>,
+                 <&dispcc_vsync_clk>;
+        clock-names = "nrt_bus",
+                      "iface",
+                      "lut",
+                      "core",
+                      "vsync";
+
+        assigned-clocks = <&dispcc_vsync_clk>;
+        assigned-clock-rates = <19200000>;
+
+        operating-points-v2 = <&mdp_opp_table>;
+        power-domains = <&rpmhpd RPMHPD_MMCX>;
+
+        interrupt-parent = <&mdss>;
+        interrupts = <0>;
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                reg = <0>;
+                dpu_intf1_out: endpoint {
+                    remote-endpoint = <&dsi0_in>;
+                };
+            };
+
+            port@1 {
+                reg = <1>;
+                dpu_intf2_out: endpoint {
+                    remote-endpoint = <&dsi1_in>;
+                };
+            };
+        };
+
+        mdp_opp_table: opp-table {
+            compatible = "operating-points-v2";
+
+            opp-200000000 {
+                opp-hz = /bits/ 64 <200000000>;
+                required-opps = <&rpmhpd_opp_low_svs>;
+            };
+
+            opp-325000000 {
+                opp-hz = /bits/ 64 <325000000>;
+                required-opps = <&rpmhpd_opp_svs>;
+            };
+
+            opp-375000000 {
+                opp-hz = /bits/ 64 <375000000>;
+                required-opps = <&rpmhpd_opp_svs_l1>;
+            };
+
+            opp-514000000 {
+                opp-hz = /bits/ 64 <514000000>;
+                required-opps = <&rpmhpd_opp_nom>;
+            };
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml
new file mode 100644
index 000000000000..bd11119dc93d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml
@@ -0,0 +1,328 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/msm/qcom,sm8650-mdss.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SM8650 Display MDSS
+
+maintainers:
+  - Neil Armstrong <neil.armstrong@linaro.org>
+
+description:
+  SM8650 MSM Mobile Display Subsystem(MDSS), which encapsulates sub-blocks like
+  DPU display controller, DSI and DP interfaces etc.
+
+$ref: /schemas/display/msm/mdss-common.yaml#
+
+properties:
+  compatible:
+    const: qcom,sm8650-mdss
+
+  clocks:
+    items:
+      - description: Display AHB
+      - description: Display hf AXI
+      - description: Display core
+
+  iommus:
+    maxItems: 1
+
+  interconnects:
+    maxItems: 2
+
+  interconnect-names:
+    maxItems: 2
+
+patternProperties:
+  "^display-controller@[0-9a-f]+$":
+    type: object
+    properties:
+      compatible:
+        const: qcom,sm8650-dpu
+
+  "^displayport-controller@[0-9a-f]+$":
+    type: object
+    properties:
+      compatible:
+        const: qcom,sm8650-dp
+
+  "^dsi@[0-9a-f]+$":
+    type: object
+    properties:
+      compatible:
+        items:
+          - const: qcom,sm8650-dsi-ctrl
+          - const: qcom,mdss-dsi-ctrl
+
+  "^phy@[0-9a-f]+$":
+    type: object
+    properties:
+      compatible:
+        const: qcom,sm8650-dsi-phy-4nm
+
+required:
+  - compatible
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,rpmh.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/qcom,rpmhpd.h>
+
+    display-subsystem@ae00000 {
+        compatible = "qcom,sm8650-mdss";
+        reg = <0x0ae00000 0x1000>;
+        reg-names = "mdss";
+
+        resets = <&dispcc_core_bcr>;
+
+        power-domains = <&dispcc_gdsc>;
+
+        clocks = <&gcc_ahb_clk>,
+                 <&gcc_axi_clk>,
+                 <&dispcc_mdp_clk>;
+        clock-names = "bus", "nrt_bus", "core";
+
+        interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-controller;
+        #interrupt-cells = <1>;
+
+        iommus = <&apps_smmu 0x1c00 0x2>;
+
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges;
+
+        display-controller@ae01000 {
+            compatible = "qcom,sm8650-dpu";
+            reg = <0x0ae01000 0x8f000>,
+                  <0x0aeb0000 0x2008>;
+            reg-names = "mdp", "vbif";
+
+            clocks = <&gcc_axi_clk>,
+                     <&dispcc_ahb_clk>,
+                     <&dispcc_mdp_lut_clk>,
+                     <&dispcc_mdp_clk>,
+                     <&dispcc_mdp_vsync_clk>;
+            clock-names = "nrt_bus",
+                          "iface",
+                          "lut",
+                          "core",
+                          "vsync";
+
+            assigned-clocks = <&dispcc_mdp_vsync_clk>;
+            assigned-clock-rates = <19200000>;
+
+            operating-points-v2 = <&mdp_opp_table>;
+            power-domains = <&rpmhpd RPMHPD_MMCX>;
+
+            interrupt-parent = <&mdss>;
+            interrupts = <0>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    dpu_intf1_out: endpoint {
+                        remote-endpoint = <&dsi0_in>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    dpu_intf2_out: endpoint {
+                        remote-endpoint = <&dsi1_in>;
+                    };
+                };
+            };
+
+            mdp_opp_table: opp-table {
+                compatible = "operating-points-v2";
+
+                opp-200000000 {
+                    opp-hz = /bits/ 64 <200000000>;
+                    required-opps = <&rpmhpd_opp_low_svs>;
+                };
+
+                opp-325000000 {
+                    opp-hz = /bits/ 64 <325000000>;
+                    required-opps = <&rpmhpd_opp_svs>;
+                };
+
+                opp-375000000 {
+                    opp-hz = /bits/ 64 <375000000>;
+                    required-opps = <&rpmhpd_opp_svs_l1>;
+                };
+
+                opp-514000000 {
+                    opp-hz = /bits/ 64 <514000000>;
+                    required-opps = <&rpmhpd_opp_nom>;
+                };
+            };
+        };
+
+        dsi@ae94000 {
+            compatible = "qcom,sm8650-dsi-ctrl", "qcom,mdss-dsi-ctrl";
+            reg = <0x0ae94000 0x400>;
+            reg-names = "dsi_ctrl";
+
+            interrupt-parent = <&mdss>;
+            interrupts = <4>;
+
+            clocks = <&dispc_byte_clk>,
+                     <&dispcc_intf_clk>,
+                     <&dispcc_pclk>,
+                     <&dispcc_esc_clk>,
+                     <&dispcc_ahb_clk>,
+                     <&gcc_bus_clk>;
+            clock-names = "byte",
+                          "byte_intf",
+                          "pixel",
+                          "core",
+                          "iface",
+                          "bus";
+
+            assigned-clocks = <&dispcc_byte_clk>,
+                              <&dispcc_pclk>;
+            assigned-clock-parents = <&dsi0_phy 0>, <&dsi0_phy 1>;
+
+            operating-points-v2 = <&dsi_opp_table>;
+            power-domains = <&rpmhpd RPMHPD_MMCX>;
+
+            phys = <&dsi0_phy>;
+            phy-names = "dsi";
+
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    dsi0_in: endpoint {
+                        remote-endpoint = <&dpu_intf1_out>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    dsi0_out: endpoint {
+                    };
+                };
+            };
+
+            dsi_opp_table: opp-table {
+                compatible = "operating-points-v2";
+
+                opp-187500000 {
+                    opp-hz = /bits/ 64 <187500000>;
+                    required-opps = <&rpmhpd_opp_low_svs>;
+                };
+
+                opp-300000000 {
+                    opp-hz = /bits/ 64 <300000000>;
+                    required-opps = <&rpmhpd_opp_svs>;
+                };
+
+                opp-358000000 {
+                    opp-hz = /bits/ 64 <358000000>;
+                    required-opps = <&rpmhpd_opp_svs_l1>;
+                };
+            };
+        };
+
+        dsi0_phy: phy@ae94400 {
+            compatible = "qcom,sm8650-dsi-phy-4nm";
+            reg = <0x0ae95000 0x200>,
+                  <0x0ae95200 0x280>,
+                  <0x0ae95500 0x400>;
+            reg-names = "dsi_phy",
+                        "dsi_phy_lane",
+                        "dsi_pll";
+
+            #clock-cells = <1>;
+            #phy-cells = <0>;
+
+            clocks = <&dispcc_iface_clk>,
+                     <&rpmhcc_ref_clk>;
+            clock-names = "iface", "ref";
+        };
+
+        dsi@ae96000 {
+            compatible = "qcom,sm8650-dsi-ctrl", "qcom,mdss-dsi-ctrl";
+            reg = <0x0ae96000 0x400>;
+            reg-names = "dsi_ctrl";
+
+            interrupt-parent = <&mdss>;
+            interrupts = <5>;
+
+            clocks = <&dispc_byte_clk>,
+                     <&dispcc_intf_clk>,
+                     <&dispcc_pclk>,
+                     <&dispcc_esc_clk>,
+                     <&dispcc_ahb_clk>,
+                     <&gcc_bus_clk>;
+            clock-names = "byte",
+                          "byte_intf",
+                          "pixel",
+                          "core",
+                          "iface",
+                          "bus";
+
+            assigned-clocks = <&dispcc_byte_clk>,
+                              <&dispcc_pclk>;
+            assigned-clock-parents = <&dsi1_phy 0>, <&dsi1_phy 1>;
+
+            operating-points-v2 = <&dsi_opp_table>;
+            power-domains = <&rpmhpd RPMHPD_MMCX>;
+
+            phys = <&dsi1_phy>;
+            phy-names = "dsi";
+
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    dsi1_in: endpoint {
+                        remote-endpoint = <&dpu_intf2_out>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    dsi1_out: endpoint {
+                    };
+                };
+            };
+        };
+
+        dsi1_phy: phy@ae96400 {
+            compatible = "qcom,sm8650-dsi-phy-4nm";
+            reg = <0x0ae97000 0x200>,
+                  <0x0ae97200 0x280>,
+                  <0x0ae97500 0x400>;
+            reg-names = "dsi_phy",
+                        "dsi_phy_lane",
+                        "dsi_pll";
+
+            #clock-cells = <1>;
+            #phy-cells = <0>;
+
+            clocks = <&dispcc_iface_clk>,
+                     <&rpmhcc_ref_clk>;
+            clock-names = "iface", "ref";
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml
index 509d20c091af..6a206111d4e0 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml
@@ -62,6 +62,9 @@ properties:
         - description: MPM pin number
         - description: GIC SPI number for the MPM pin
 
+  '#power-domain-cells':
+    const: 0
+
 required:
   - compatible
   - reg
@@ -93,4 +96,5 @@ examples:
                            <86 183>,
                            <90 260>,
                            <91 260>;
+        #power-domain-cells = <0>;
     };
diff --git a/Documentation/devicetree/bindings/perf/riscv,pmu.yaml b/Documentation/devicetree/bindings/perf/riscv,pmu.yaml
index c8448de2f2a0..d01c677ad3c7 100644
--- a/Documentation/devicetree/bindings/perf/riscv,pmu.yaml
+++ b/Documentation/devicetree/bindings/perf/riscv,pmu.yaml
@@ -90,7 +90,7 @@ properties:
             bitmap of all MHPMCOUNTERx that can monitor the range of events
 
 dependencies:
-  "riscv,event-to-mhpmevent": [ "riscv,event-to-mhpmcounters" ]
+  riscv,event-to-mhpmevent: [ "riscv,event-to-mhpmcounters" ]
 
 required:
   - compatible
diff --git a/Documentation/devicetree/bindings/pinctrl/nxp,s32g2-siul2-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/nxp,s32g2-siul2-pinctrl.yaml
index d49aafd8c5f4..a24286e4def6 100644
--- a/Documentation/devicetree/bindings/pinctrl/nxp,s32g2-siul2-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/nxp,s32g2-siul2-pinctrl.yaml
@@ -9,7 +9,7 @@ title: NXP S32G2 pin controller
 
 maintainers:
   - Ghennadi Procopciuc <Ghennadi.Procopciuc@oss.nxp.com>
-  - Chester Lin <clin@suse.com>
+  - Chester Lin <chester62515@gmail.com>
 
 description: |
   S32G2 pinmux is implemented in SIUL2 (System Integration Unit Lite2),
diff --git a/Documentation/devicetree/bindings/pwm/imx-pwm.yaml b/Documentation/devicetree/bindings/pwm/imx-pwm.yaml
index c01dff3b7f84..a84a240a61dc 100644
--- a/Documentation/devicetree/bindings/pwm/imx-pwm.yaml
+++ b/Documentation/devicetree/bindings/pwm/imx-pwm.yaml
@@ -14,12 +14,10 @@ allOf:
 
 properties:
   "#pwm-cells":
-    description: |
-      Should be 2 for i.MX1 and 3 for i.MX27 and newer SoCs. See pwm.yaml
-      in this directory for a description of the cells format.
-    enum:
-      - 2
-      - 3
+    description:
+      The only third cell flag supported by this binding is
+      PWM_POLARITY_INVERTED. fsl,imx1-pwm does not support this flags.
+    const: 3
 
   compatible:
     oneOf:
diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml
index e4fa6a07b4fa..1309bf5ae0cd 100644
--- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml
+++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml
@@ -233,6 +233,7 @@ allOf:
               - rockchip,rk3399-grf
               - rockchip,rk3399-pmugrf
               - rockchip,rk3568-pmugrf
+              - rockchip,rk3588-pmugrf
               - rockchip,rv1108-grf
               - rockchip,rv1108-pmugrf
 
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index f549a68951d7..8bc4ebe7a36f 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -115,6 +115,7 @@ available subsections can be seen below.
    hte/index
    wmi
    dpll
+   wbrf
 
 .. only::  subproject and html
 
diff --git a/Documentation/driver-api/wbrf.rst b/Documentation/driver-api/wbrf.rst
new file mode 100644
index 000000000000..f48bfa029813
--- /dev/null
+++ b/Documentation/driver-api/wbrf.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+=================================
+WBRF - Wifi Band RFI Mitigations
+=================================
+
+Due to electrical and mechanical constraints in certain platform designs
+there may be likely interference of relatively high-powered harmonics of
+the GPU memory clocks with local radio module frequency bands used by
+certain Wifi bands.
+
+To mitigate possible RFI interference producers can advertise the
+frequencies in use and consumers can use this information to avoid using
+these frequencies for sensitive features.
+
+When a platform is known to have this issue with any contained devices,
+the platform designer will advertise the availability of this feature via
+ACPI devices with a device specific method (_DSM).
+* Producers with this _DSM will be able to advertise the frequencies in use.
+* Consumers with this _DSM will be able to register for notifications of
+frequencies in use.
+
+Some general terms
+==================
+
+Producer: such component who can produce high-powered radio frequency
+Consumer: such component who can adjust its in-use frequency in
+response to the radio frequencies of other components to mitigate the
+possible RFI.
+
+To make the mechanism function, those producers should notify active use
+of their particular frequencies so that other consumers can make relative
+internal adjustments as necessary to avoid this resonance.
+
+ACPI interface
+==============
+
+Although initially used by for wifi + dGPU use cases, the ACPI interface
+can be scaled to any type of device that a platform designer discovers
+can cause interference.
+
+The GUID used for the _DSM is 7B7656CF-DC3D-4C1C-83E9-66E721DE3070.
+
+3 functions are available in this _DSM:
+
+* 0: discover # of functions available
+* 1: record RF bands in use
+* 2: retrieve RF bands in use
+
+Driver programming interface
+============================
+
+.. kernel-doc:: drivers/platform/x86/amd/wbrf.c
+
+Sample Usage
+=============
+
+The expected flow for the producers:
+1. During probe, call `acpi_amd_wbrf_supported_producer` to check if WBRF
+can be enabled for the device.
+2. On using some frequency band, call `acpi_amd_wbrf_add_remove` with 'add'
+param to get other consumers properly notified.
+3. Or on stopping using some frequency band, call
+`acpi_amd_wbrf_add_remove` with 'remove' param to get other consumers notified.
+
+The expected flow for the consumers:
+1. During probe, call `acpi_amd_wbrf_supported_consumer` to check if WBRF
+can be enabled for the device.
+2. Call `amd_wbrf_register_notifier` to register for notification
+of frequency band change(add or remove) from other producers.
+3. Call the `amd_wbrf_retrieve_freq_band` initally to retrieve
+current active frequency bands considering some producers may broadcast
+such information before the consumer is up.
+4. On receiving a notification for frequency band change, run
+`amd_wbrf_retrieve_freq_band` again to retrieve the latest
+active frequency bands.
+5. During driver cleanup, call `amd_wbrf_unregister_notifier` to
+unregister the notifier.
diff --git a/Documentation/gpu/amdgpu/apu-asic-info-table.csv b/Documentation/gpu/amdgpu/apu-asic-info-table.csv
index 2e76b427ba1e..18868abe2a91 100644
--- a/Documentation/gpu/amdgpu/apu-asic-info-table.csv
+++ b/Documentation/gpu/amdgpu/apu-asic-info-table.csv
@@ -7,6 +7,7 @@ SteamDeck, VANGOGH, DCN 3.0.1, 10.3.1, VCN 3.1.0, 5.2.1, 11.5.0
 Ryzen 5000 series / Ryzen 7x30 series, GREEN SARDINE / Cezanne / Barcelo / Barcelo-R, DCN 2.1, 9.3, VCN 2.2, 4.1.1, 12.0.1
 Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series, YELLOW CARP / Rembrandt / Rembrandt-R, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3, 13.0.3
 Ryzen 7000 series (AM5), Raphael, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
-Ryzen 7x45 series (FL1), / Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
+Ryzen 7x45 series (FL1), Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
 Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8
-Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
-\ No newline at end of file
+Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
+Ryzen 8x40 series, Hawk Point, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
diff --git a/Documentation/gpu/amdgpu/display/dc-debug.rst b/Documentation/gpu/amdgpu/display/dc-debug.rst
index 40c55a618918..817631b1dbf3 100644
--- a/Documentation/gpu/amdgpu/display/dc-debug.rst
+++ b/Documentation/gpu/amdgpu/display/dc-debug.rst
@@ -75,3 +75,44 @@ change in real-time by using something like::
 
 When reporting a bug related to DC, consider attaching this log before and
 after you reproduce the bug.
+
+DMUB Firmware Debug
+===================
+
+Sometimes, dmesg logs aren't enough. This is especially true if a feature is
+implemented primarily in DMUB firmware. In such cases, all we see in dmesg when
+an issue arises is some generic timeout error. So, to get more relevant
+information, we can trace DMUB commands by enabling the relevant bits in
+`amdgpu_dm_dmub_trace_mask`.
+
+Currently, we support the tracing of the following groups:
+
+Trace Groups
+------------
+
+.. csv-table::
+   :header-rows: 1
+   :widths: 1, 1
+   :file: ./trace-groups-table.csv
+
+**Note: Not all ASICs support all of the listed trace groups**
+
+So, to enable just PSR tracing you can use the following command::
+
+  # echo 0x8020 > /sys/kernel/debug/dri/0/amdgpu_dm_dmub_trace_mask
+
+Then, you need to enable logging trace events to the buffer, which you can do
+using the following::
+
+  # echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
+
+Lastly, after you are able to reproduce the issue you are trying to debug,
+you can disable tracing and read the trace log by using the following::
+
+  # echo 0 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
+  # cat /sys/kernel/debug/dri/0/amdgpu_dm_dmub_tracebuffer
+
+So, when reporting bugs related to features such as PSR and ABM, consider
+enabling the relevant bits in the mask before reproducing the issue and
+attach the log that you obtain from the trace buffer in any bug reports that you
+create.
diff --git a/Documentation/gpu/amdgpu/display/trace-groups-table.csv b/Documentation/gpu/amdgpu/display/trace-groups-table.csv
new file mode 100644
index 000000000000..3f6a50d1d883
--- /dev/null
+++ b/Documentation/gpu/amdgpu/display/trace-groups-table.csv
@@ -0,0 +1,29 @@
+Name, Mask Value
+INFO, 0x1
+IRQ SVC, 0x2
+VBIOS, 0x4
+REGISTER, 0x8
+PHY DBG, 0x10
+PSR, 0x20
+AUX, 0x40
+SMU, 0x80
+MALL, 0x100
+ABM, 0x200
+ALPM, 0x400
+TIMER, 0x800
+HW LOCK MGR, 0x1000
+INBOX1, 0x2000
+PHY SEQ, 0x4000
+PSR STATE, 0x8000
+ZSTATE, 0x10000
+TRANSMITTER CTL, 0x20000
+PANEL CNTL, 0x40000
+FAMS, 0x80000
+DPIA, 0x100000
+SUBVP, 0x200000
+INBOX0, 0x400000
+SDP, 0x4000000
+REPLAY, 0x8000000
+REPLAY RESIDENCY, 0x20000000
+CURSOR INFO, 0x80000000
+IPS, 0x100000000
diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst
index c08bcbb95fb3..e5070a0e95ab 100644
--- a/Documentation/gpu/driver-uapi.rst
+++ b/Documentation/gpu/driver-uapi.rst
@@ -17,3 +17,8 @@ VM_BIND / EXEC uAPI
     :doc: Overview
 
 .. kernel-doc:: include/uapi/drm/nouveau_drm.h
+
+drm/xe uAPI
+===========
+
+.. kernel-doc:: include/uapi/drm/xe_drm.h
diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst
index cc6535f5f28c..b899cbc5c2b4 100644
--- a/Documentation/gpu/drivers.rst
+++ b/Documentation/gpu/drivers.rst
@@ -18,6 +18,7 @@ GPU Driver Documentation
    vkms
    bridge/dw-hdmi
    xen-front
+   xe/index
    afbc
    komeda-kms
    panfrost
diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst
new file mode 100644
index 000000000000..c224ecaee81e
--- /dev/null
+++ b/Documentation/gpu/xe/index.rst
@@ -0,0 +1,25 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=======================
+drm/xe Intel GFX Driver
+=======================
+
+The drm/xe driver supports some future GFX cards with rendering, display,
+compute and media. Support for currently available platforms like TGL, ADL,
+DG2, etc is provided to prototype the driver.
+
+.. toctree::
+   :titlesonly:
+
+   xe_mm
+   xe_map
+   xe_migrate
+   xe_cs
+   xe_pm
+   xe_pcode
+   xe_gt_mcr
+   xe_wa
+   xe_rtp
+   xe_firmware
+   xe_tile
+   xe_debugging
diff --git a/Documentation/gpu/xe/xe_cs.rst b/Documentation/gpu/xe/xe_cs.rst
new file mode 100644
index 000000000000..e379aed4f5a8
--- /dev/null
+++ b/Documentation/gpu/xe/xe_cs.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+==================
+Command submission
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_exec.c
+   :doc: Execbuf (User GPU command submission)
diff --git a/Documentation/gpu/xe/xe_debugging.rst b/Documentation/gpu/xe/xe_debugging.rst
new file mode 100644
index 000000000000..d65e56ff3500
--- /dev/null
+++ b/Documentation/gpu/xe/xe_debugging.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=========
+Debugging
+=========
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_assert.h
diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst
new file mode 100644
index 000000000000..afcb561cd37d
--- /dev/null
+++ b/Documentation/gpu/xe/xe_firmware.rst
@@ -0,0 +1,37 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+========
+Firmware
+========
+
+Firmware Layout
+===============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h
+   :doc: CSS-based Firmware Layout
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h
+   :doc: GSC-based Firmware Layout
+
+Write Once Protected Content Memory (WOPCM) Layout
+==================================================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_wopcm.c
+   :doc: Write Once Protected Content Memory (WOPCM) Layout
+
+GuC CTB Blob
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_ct.c
+   :doc: GuC CTB Blob
+
+GuC Power Conservation (PC)
+===========================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_pc.c
+   :doc: GuC Power Conservation (PC)
+
+Internal API
+============
+
+TODO
diff --git a/Documentation/gpu/xe/xe_gt_mcr.rst b/Documentation/gpu/xe/xe_gt_mcr.rst
new file mode 100644
index 000000000000..848c07bc36d0
--- /dev/null
+++ b/Documentation/gpu/xe/xe_gt_mcr.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+==============================================
+GT Multicast/Replicated (MCR) Register Support
+==============================================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_mcr.c
+   :doc: GT Multicast/Replicated (MCR) Register Support
+
+Internal API
+============
+
+TODO
diff --git a/Documentation/gpu/xe/xe_map.rst b/Documentation/gpu/xe/xe_map.rst
new file mode 100644
index 000000000000..a098cfd2df04
--- /dev/null
+++ b/Documentation/gpu/xe/xe_map.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=========
+Map Layer
+=========
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_map.h
+   :doc: Map layer
diff --git a/Documentation/gpu/xe/xe_migrate.rst b/Documentation/gpu/xe/xe_migrate.rst
new file mode 100644
index 000000000000..f92faec0ac94
--- /dev/null
+++ b/Documentation/gpu/xe/xe_migrate.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=============
+Migrate Layer
+=============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_migrate_doc.h
+   :doc: Migrate Layer
diff --git a/Documentation/gpu/xe/xe_mm.rst b/Documentation/gpu/xe/xe_mm.rst
new file mode 100644
index 000000000000..6c8fd8b4a466
--- /dev/null
+++ b/Documentation/gpu/xe/xe_mm.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=================
+Memory Management
+=================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_bo_doc.h
+   :doc: Buffer Objects (BO)
+
+Pagetable building
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pt.c
+   :doc: Pagetable building
diff --git a/Documentation/gpu/xe/xe_pcode.rst b/Documentation/gpu/xe/xe_pcode.rst
new file mode 100644
index 000000000000..d2e22cc45061
--- /dev/null
+++ b/Documentation/gpu/xe/xe_pcode.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=====
+Pcode
+=====
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c
+   :doc: PCODE
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_pm.rst b/Documentation/gpu/xe/xe_pm.rst
new file mode 100644
index 000000000000..6781cdfb24f6
--- /dev/null
+++ b/Documentation/gpu/xe/xe_pm.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+========================
+Runtime Power Management
+========================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c
+   :doc: Xe Power Management
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_rtp.rst b/Documentation/gpu/xe/xe_rtp.rst
new file mode 100644
index 000000000000..7fdf4b6c1a04
--- /dev/null
+++ b/Documentation/gpu/xe/xe_rtp.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=========================
+Register Table Processing
+=========================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c
+   :doc: Register Table Processing
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp_types.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_tile.rst b/Documentation/gpu/xe/xe_tile.rst
new file mode 100644
index 000000000000..c33f68dd95b6
--- /dev/null
+++ b/Documentation/gpu/xe/xe_tile.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+==================
+Multi-tile Devices
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_tile.c
+   :doc: Multi-tile Design
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_tile.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_wa.rst b/Documentation/gpu/xe/xe_wa.rst
new file mode 100644
index 000000000000..f8811cc6adcc
--- /dev/null
+++ b/Documentation/gpu/xe/xe_wa.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+====================
+Hardware workarounds
+====================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c
+   :doc: Hardware workarounds
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c
+   :internal:
diff --git a/Documentation/networking/tcp_ao.rst b/Documentation/networking/tcp_ao.rst
index cfa5bf1cc542..8a58321acce7 100644
--- a/Documentation/networking/tcp_ao.rst
+++ b/Documentation/networking/tcp_ao.rst
@@ -99,7 +99,7 @@ also [6.1]::
    when it is no longer considered permitted.
 
 Linux TCP-AO will try its best to prevent you from removing a key that's
-being used, considering it a key management failure. But sine keeping
+being used, considering it a key management failure. But since keeping
 an outdated key may become a security issue and as a peer may
 unintentionally prevent the removal of an old key by always setting
 it as RNextKeyID - a forced key removal mechanism is provided, where
diff --git a/Documentation/trace/coresight/coresight.rst b/Documentation/trace/coresight/coresight.rst
index 4a71ea6cb390..826e59a698da 100644
--- a/Documentation/trace/coresight/coresight.rst
+++ b/Documentation/trace/coresight/coresight.rst
@@ -130,7 +130,7 @@ Misc:
 Device Tree Bindings
 --------------------
 
-See Documentation/devicetree/bindings/arm/arm,coresight-\*.yaml for details.
+See ``Documentation/devicetree/bindings/arm/arm,coresight-*.yaml`` for details.
 
 As of this writing drivers for ITM, STMs and CTIs are not provided but are
 expected to be added as the solution matures.
diff --git a/MAINTAINERS b/MAINTAINERS
index f2cee321099f..d7e82248f068 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -171,13 +171,10 @@ S:	Supported
 F:	drivers/soc/fujitsu/a64fx-diag.c
 
 A8293 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/dvb-frontends/a8293*
 
 AACRAID SCSI RAID DRIVER
@@ -576,23 +573,17 @@ F:	drivers/iio/accel/adxl372_i2c.c
 F:	drivers/iio/accel/adxl372_spi.c
 
 AF9013 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/dvb-frontends/af9013*
 
 AF9033 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/dvb-frontends/af9033*
 
 AFFS FILE SYSTEM
@@ -650,13 +641,10 @@ F:	fs/aio.c
 F:	include/linux/*aio*.h
 
 AIRSPY MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/usb/airspy/
 
 ALACRITECH GIGABIT ETHERNET DRIVER
@@ -2155,6 +2143,7 @@ S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git
 F:	arch/arm/boot/dts/nxp/imx/
 F:	arch/arm/boot/dts/nxp/mxs/
+F:	arch/arm64/boot/dts/freescale/
 X:	arch/arm64/boot/dts/freescale/fsl-*
 X:	arch/arm64/boot/dts/freescale/qoriq-*
 X:	drivers/media/i2c/
@@ -2535,7 +2524,7 @@ F:	drivers/*/*/*wpcm*
 F:	drivers/*/*wpcm*
 
 ARM/NXP S32G ARCHITECTURE
-M:	Chester Lin <clin@suse.com>
+M:	Chester Lin <chester62515@gmail.com>
 R:	Andreas Färber <afaerber@suse.de>
 R:	Matthias Brugger <mbrugger@suse.com>
 R:	NXP S32 Linux Team <s32@nxp.com>
@@ -5076,7 +5065,6 @@ CLANG CONTROL FLOW INTEGRITY SUPPORT
 M:	Sami Tolvanen <samitolvanen@google.com>
 M:	Kees Cook <keescook@chromium.org>
 R:	Nathan Chancellor <nathan@kernel.org>
-R:	Nick Desaulniers <ndesaulniers@google.com>
 L:	llvm@lists.linux.dev
 S:	Supported
 B:	https://github.com/ClangBuiltLinux/linux/issues
@@ -5091,8 +5079,9 @@ F:	.clang-format
 
 CLANG/LLVM BUILD SUPPORT
 M:	Nathan Chancellor <nathan@kernel.org>
-M:	Nick Desaulniers <ndesaulniers@google.com>
-R:	Tom Rix <trix@redhat.com>
+R:	Nick Desaulniers <ndesaulniers@google.com>
+R:	Bill Wendling <morbo@google.com>
+R:	Justin Stitt <justinstitt@google.com>
 L:	llvm@lists.linux.dev
 S:	Supported
 W:	https://clangbuiltlinux.github.io/
@@ -5242,7 +5231,6 @@ F:	drivers/platform/x86/compal-laptop.c
 
 COMPILER ATTRIBUTES
 M:	Miguel Ojeda <ojeda@kernel.org>
-R:	Nick Desaulniers <ndesaulniers@google.com>
 S:	Maintained
 F:	include/linux/compiler_attributes.h
 
@@ -5605,13 +5593,10 @@ F:	Documentation/driver-api/media/drivers/cx88*
 F:	drivers/media/pci/cx88/
 
 CXD2820R MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/dvb-frontends/cxd2820r*
 
 CXGB3 ETHERNET DRIVER (CXGB3)
@@ -5724,13 +5709,10 @@ F:	Documentation/devicetree/bindings/input/cypress-sf.yaml
 F:	drivers/input/keyboard/cypress-sf.c
 
 CYPRESS_FIRMWARE MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/common/cypress_firmware*
 
 CYTTSP TOUCHSCREEN DRIVER
@@ -7330,53 +7312,38 @@ T:	git git://linuxtv.org/media_tree.git
 F:	drivers/media/pci/dt3155/
 
 DVB_USB_AF9015 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/usb/dvb-usb-v2/af9015*
 
 DVB_USB_AF9035 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/usb/dvb-usb-v2/af9035*
 
 DVB_USB_ANYSEE MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/usb/dvb-usb-v2/anysee*
 
 DVB_USB_AU6610 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/usb/dvb-usb-v2/au6610*
 
 DVB_USB_CE6230 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/usb/dvb-usb-v2/ce6230*
 
 DVB_USB_CXUSB MEDIA DRIVER
@@ -7390,22 +7357,17 @@ T:	git git://linuxtv.org/media_tree.git
 F:	drivers/media/usb/dvb-usb/cxusb*
 
 DVB_USB_EC168 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/usb/dvb-usb-v2/ec168*
 
 DVB_USB_GL861 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/usb/dvb-usb-v2/gl861*
 
 DVB_USB_MXL111SF MEDIA DRIVER
@@ -7419,23 +7381,18 @@ T:	git git://linuxtv.org/mkrufky/mxl111sf.git
 F:	drivers/media/usb/dvb-usb-v2/mxl111sf*
 
 DVB_USB_RTL28XXU MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/usb/dvb-usb-v2/rtl28xxu*
 
 DVB_USB_V2 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
 W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/usb/dvb-usb-v2/dvb_usb*
 F:	drivers/media/usb/dvb-usb-v2/usb_urb.c
 
@@ -7477,13 +7434,10 @@ F:	Documentation/devicetree/bindings/input/e3x0-button.txt
 F:	drivers/input/misc/e3x0-button.c
 
 E4000 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/tuners/e4000*
 
 EARTH_PT1 MEDIA DRIVER
@@ -7499,13 +7453,10 @@ S:	Odd Fixes
 F:	drivers/media/pci/pt3/
 
 EC100 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/dvb-frontends/ec100*
 
 ECRYPT FILE SYSTEM
@@ -8123,13 +8074,10 @@ F:	drivers/media/tuners/fc0011.c
 F:	drivers/media/tuners/fc0011.h
 
 FC2580 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/tuners/fc2580*
 
 FCOE SUBSYSTEM (libfc, libfcoe, fcoe)
@@ -9259,13 +9207,10 @@ F:	include/trace/events/habanalabs.h
 F:	include/uapi/drm/habanalabs_accel.h
 
 HACKRF MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/usb/hackrf/
 
 HANDSHAKE UPCALL FOR TRANSPORT LAYER SECURITY
@@ -9638,6 +9583,7 @@ F:	drivers/crypto/hisilicon/sgl.c
 F:	include/linux/hisi_acc_qm.h
 
 HISILICON ROCE DRIVER
+M:	Chengchang Tang <tangchengchang@huawei.com>
 M:	Junxian Huang <huangjunxian6@hisilicon.com>
 L:	linux-rdma@vger.kernel.org
 S:	Maintained
@@ -10653,7 +10599,17 @@ L:	linux-kernel@vger.kernel.org
 S:	Supported
 F:	arch/x86/include/asm/intel-family.h
 
-INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets)
+INTEL DRM DISPLAY FOR XE AND I915 DRIVERS
+M:	Jani Nikula <jani.nikula@linux.intel.com>
+M:	Rodrigo Vivi <rodrigo.vivi@intel.com>
+L:	intel-gfx@lists.freedesktop.org
+L:	intel-xe@lists.freedesktop.org
+S:	Supported
+F:	drivers/gpu/drm/i915/display/
+F:	drivers/gpu/drm/xe/display/
+F:	drivers/gpu/drm/xe/compat-i915-headers
+
+INTEL DRM I915 DRIVER (Meteor Lake, DG2 and older excluding Poulsbo, Moorestown and derivative)
 M:	Jani Nikula <jani.nikula@linux.intel.com>
 M:	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
 M:	Rodrigo Vivi <rodrigo.vivi@intel.com>
@@ -10672,6 +10628,23 @@ F:	drivers/gpu/drm/i915/
 F:	include/drm/i915*
 F:	include/uapi/drm/i915_drm.h
 
+INTEL DRM XE DRIVER (Lunar Lake and newer)
+M:	Lucas De Marchi <lucas.demarchi@intel.com>
+M:	Oded Gabbay <ogabbay@kernel.org>
+M:	Thomas Hellström <thomas.hellstrom@linux.intel.com>
+L:	intel-xe@lists.freedesktop.org
+S:	Supported
+W:	https://drm.pages.freedesktop.org/intel-docs/
+Q:	http://patchwork.freedesktop.org/project/intel-xe/
+B:	https://gitlab.freedesktop.org/drm/xe/kernel/-/issues
+C:	irc://irc.oftc.net/xe
+T:	git https://gitlab.freedesktop.org/drm/xe/kernel.git
+F:	Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
+F:	Documentation/gpu/xe/
+F:	drivers/gpu/drm/xe/
+F:	include/drm/xe*
+F:	include/uapi/drm/xe_drm.h
+
 INTEL ETHERNET DRIVERS
 M:	Jesse Brandeburg <jesse.brandeburg@intel.com>
 M:	Tony Nguyen <anthony.l.nguyen@intel.com>
@@ -11349,13 +11322,10 @@ F:	Documentation/hwmon/it87.rst
 F:	drivers/hwmon/it87.c
 
 IT913X MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/tuners/it913x*
 
 ITE IT66121 HDMI BRIDGE DRIVER
@@ -11537,7 +11507,6 @@ F:	fs/autofs/
 KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
 M:	Masahiro Yamada <masahiroy@kernel.org>
 R:	Nathan Chancellor <nathan@kernel.org>
-R:	Nick Desaulniers <ndesaulniers@google.com>
 R:	Nicolas Schier <nicolas@fjasle.eu>
 L:	linux-kbuild@vger.kernel.org
 S:	Maintained
@@ -12229,6 +12198,13 @@ F:	include/linux/nd.h
 F:	include/uapi/linux/ndctl.h
 F:	tools/testing/nvdimm/
 
+LIBRARY CODE
+M:	Andrew Morton <akpm@linux-foundation.org>
+L:	linux-kernel@vger.kernel.org
+S:	Supported
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-nonmm-unstable
+F:	lib/*
+
 LICENSES and SPDX stuff
 M:	Thomas Gleixner <tglx@linutronix.de>
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
@@ -12708,13 +12684,10 @@ W:	http://www.tazenda.demon.co.uk/phil/linux-hp
 F:	arch/m68k/hp300/
 
 M88DS3103 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/dvb-frontends/m88ds3103*
 
 M88RS2000 MEDIA DRIVER
@@ -14608,20 +14581,16 @@ F:	include/asm-generic/tlb.h
 F:	mm/mmu_gather.c
 
 MN88472 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 F:	drivers/media/dvb-frontends/mn88472*
 
 MN88473 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 F:	drivers/media/dvb-frontends/mn88473*
 
@@ -14709,23 +14678,17 @@ S:	Orphan
 F:	drivers/platform/x86/msi-wmi.c
 
 MSI001 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/tuners/msi001*
 
 MSI2500 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/usb/msi2500/
 
 MSTAR INTERRUPT CONTROLLER DRIVER
@@ -15089,6 +15052,7 @@ F:	lib/random32.c
 F:	net/
 F:	tools/net/
 F:	tools/testing/selftests/net/
+X:	net/9p/
 X:	net/bluetooth/
 
 NETWORKING [IPSEC]
@@ -17796,13 +17760,10 @@ F:	drivers/bus/fsl-mc/
 F:	include/uapi/linux/fsl_mc.h
 
 QT1010 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/tuners/qt1010*
 
 QUALCOMM ATH12K WIRELESS DRIVER
@@ -17969,6 +17930,8 @@ L:	iommu@lists.linux.dev
 L:	linux-arm-msm@vger.kernel.org
 S:	Maintained
 F:	drivers/iommu/arm/arm-smmu/qcom_iommu.c
+F:	drivers/iommu/arm/arm-smmu/arm-smmu-qcom*
+F:	drivers/iommu/msm_iommu*
 
 QUALCOMM IPC ROUTER (QRTR) DRIVER
 M:	Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
@@ -18855,33 +18818,24 @@ S:	Maintained
 F:	drivers/tty/rpmsg_tty.c
 
 RTL2830 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/dvb-frontends/rtl2830*
 
 RTL2832 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/dvb-frontends/rtl2832*
 
 RTL2832_SDR MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/dvb-frontends/rtl2832_sdr*
 
 RTL8180 WIRELESS DRIVER
@@ -19691,13 +19645,10 @@ F:	drivers/media/platform/renesas/sh_vou.c
 F:	include/media/drv-intf/sh_vou.h
 
 SI2157 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/tuners/si2157*
 
 SI2165 MEDIA DRIVER
@@ -19709,13 +19660,10 @@ Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 F:	drivers/media/dvb-frontends/si2165*
 
 SI2168 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/dvb-frontends/si2168*
 
 SI470X FM RADIO RECEIVER I2C DRIVER
@@ -21217,33 +21165,24 @@ W:	http://tcp-lp-mod.sourceforge.net/
 F:	net/ipv4/tcp_lp.c
 
 TDA10071 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/dvb-frontends/tda10071*
 
 TDA18212 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/tuners/tda18212*
 
 TDA18218 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/tuners/tda18218*
 
 TDA18250 MEDIA DRIVER
@@ -22179,13 +22118,10 @@ F:	include/uapi/linux/serial_core.h
 F:	include/uapi/linux/tty.h
 
 TUA9001 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
 Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/tuners/tua9001*
 
 TULIP NETWORK DRIVERS
@@ -24130,20 +24066,16 @@ S:	Orphan
 F:	drivers/net/wireless/zydas/zd1211rw/
 
 ZD1301 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org/
-W:	http://palosaari.fi/linux/
 Q:	https://patchwork.linuxtv.org/project/linux-media/list/
 F:	drivers/media/usb/dvb-usb-v2/zd1301*
 
 ZD1301_DEMOD MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
-S:	Maintained
+S:	Orphan
 W:	https://linuxtv.org/
-W:	http://palosaari.fi/linux/
 Q:	https://patchwork.linuxtv.org/project/linux-media/list/
 F:	drivers/media/dvb-frontends/zd1301_demod*
 
diff --git a/Makefile b/Makefile
index 99db546fbb45..70fc4c11dfc0 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 7
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc5
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*
diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts
index 1ab8184302db..5a2869a18bd5 100644
--- a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts
+++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts
@@ -36,9 +36,7 @@
 	gpios = <&gpio 42 GPIO_ACTIVE_HIGH>;
 };
 
-&leds {
-	/delete-node/ led_act;
-};
+/delete-node/ &led_act;
 
 &pm {
 	/delete-property/ system-power-controller;
diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts
index a3f247c722b4..0342a79ccd5d 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts
@@ -37,9 +37,9 @@
 
 &clks {
 	assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>,
-			  <&clks IMX6QDL_CLK_LDB_DI1_SEL>;
+			  <&clks IMX6QDL_CLK_LDB_DI1_SEL>, <&clks IMX6QDL_CLK_ENET_REF_SEL>;
 	assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>,
-				 <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>;
+				 <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, <&clk50m_phy>;
 };
 
 &hdmi {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi
index 4ffe99ed55ca..07dcecbe485d 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi
@@ -121,6 +121,8 @@
 			max-speed = <100>;
 			interrupt-parent = <&gpio5>;
 			interrupts = <6 IRQ_TYPE_LEVEL_LOW>;
+			clocks = <&clks IMX6UL_CLK_ENET_REF>;
+			clock-names = "rmii-ref";
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
index 29b8fd03567a..5387da8a2a0a 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
@@ -454,7 +454,7 @@
 			};
 
 			gpt1: timer@302d0000 {
-				compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
+				compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt";
 				reg = <0x302d0000 0x10000>;
 				interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clks IMX7D_GPT1_ROOT_CLK>,
@@ -463,7 +463,7 @@
 			};
 
 			gpt2: timer@302e0000 {
-				compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
+				compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt";
 				reg = <0x302e0000 0x10000>;
 				interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clks IMX7D_GPT2_ROOT_CLK>,
@@ -473,7 +473,7 @@
 			};
 
 			gpt3: timer@302f0000 {
-				compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
+				compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt";
 				reg = <0x302f0000 0x10000>;
 				interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clks IMX7D_GPT3_ROOT_CLK>,
@@ -483,7 +483,7 @@
 			};
 
 			gpt4: timer@30300000 {
-				compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
+				compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt";
 				reg = <0x30300000 0x10000>;
 				interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clks IMX7D_GPT4_ROOT_CLK>,
diff --git a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts
index a400c108f66a..6c5e6856648a 100644
--- a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts
+++ b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts
@@ -8,6 +8,7 @@
 #include "imx28-lwe.dtsi"
 
 / {
+	model = "Liebherr XEA board";
 	compatible = "lwn,imx28-xea", "fsl,imx28";
 };
 
diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi
index 7bf557c99561..01edf244ddee 100644
--- a/arch/arm/boot/dts/rockchip/rk3128.dtsi
+++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi
@@ -848,7 +848,7 @@
 			};
 
 			sdmmc_pwren: sdmmc-pwren {
-				rockchip,pins = <1 RK_PB6 1 &pcfg_pull_default>;
+				rockchip,pins = <1 RK_PB6 RK_FUNC_GPIO &pcfg_pull_default>;
 			};
 
 			sdmmc_bus4: sdmmc-bus4 {
diff --git a/arch/arm/boot/dts/rockchip/rk322x.dtsi b/arch/arm/boot/dts/rockchip/rk322x.dtsi
index ffc16d6b97e1..a721744cbfd1 100644
--- a/arch/arm/boot/dts/rockchip/rk322x.dtsi
+++ b/arch/arm/boot/dts/rockchip/rk322x.dtsi
@@ -215,9 +215,9 @@
 
 			power-domain@RK3228_PD_VOP {
 				reg = <RK3228_PD_VOP>;
-				clocks =<&cru ACLK_VOP>,
-					<&cru DCLK_VOP>,
-					<&cru HCLK_VOP>;
+				clocks = <&cru ACLK_VOP>,
+					 <&cru DCLK_VOP>,
+					 <&cru HCLK_VOP>;
 				pm_qos = <&qos_vop>;
 				#power-domain-cells = <0>;
 			};
diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h
index e62832dcba76..a8287e7ab9d4 100644
--- a/arch/arm/include/asm/kexec.h
+++ b/arch/arm/include/asm/kexec.h
@@ -2,8 +2,6 @@
 #ifndef _ARM_KEXEC_H
 #define _ARM_KEXEC_H
 
-#ifdef CONFIG_KEXEC
-
 /* Maximum physical address we can use pages from */
 #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
 /* Maximum address we can reach in physical address mode */
@@ -82,6 +80,4 @@ static inline struct page *boot_pfn_to_page(unsigned long boot_pfn)
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* CONFIG_KEXEC */
-
 #endif /* _ARM_KEXEC_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index d53f56d6f840..771264d4726a 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -59,7 +59,7 @@ obj-$(CONFIG_FUNCTION_TRACER)	+= entry-ftrace.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o insn.o patch.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o insn.o patch.o
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o insn.o patch.o
-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC_CORE)	+= machine_kexec.o relocate_kernel.o
 # Main staffs in KPROBES are in arch/arm/probes/ .
 obj-$(CONFIG_KPROBES)		+= patch.o insn.o
 obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index 2157493b78a9..df69af932375 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -501,6 +501,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b
 
 	name = devm_kasprintf(&pdev->dev,
 				GFP_KERNEL, "mmdc%d", ret);
+	if (!name) {
+		ret = -ENOMEM;
+		goto pmu_release_id;
+	}
 
 	pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk;
 	pmu_mmdc->devtype_data = (struct fsl_mmdc_devtype_data *)of_id->data;
@@ -523,9 +527,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b
 
 pmu_register_err:
 	pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret);
-	ida_simple_remove(&mmdc_ida, pmu_mmdc->id);
 	cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node);
 	hrtimer_cancel(&pmu_mmdc->hrtimer);
+pmu_release_id:
+	ida_simple_remove(&mmdc_ida, pmu_mmdc->id);
 pmu_free:
 	kfree(pmu_mmdc);
 	return ret;
diff --git a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi
index 5ce5fbf2b38e..f69b0c17560a 100644
--- a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi
@@ -82,12 +82,9 @@
 		pinctrl-0 = <&pinctrl_wifi_pdn>;
 		gpio = <&lsio_gpio1 28 GPIO_ACTIVE_HIGH>;
 		enable-active-high;
+		regulator-always-on;
 		regulator-name = "wifi_pwrdn_fake_regulator";
 		regulator-settling-time-us = <100>;
-
-		regulator-state-mem {
-			regulator-off-in-suspend;
-		};
 	};
 
 	reg_pcie_switch: regulator-pcie-switch {
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
index ce66d30a4839..b0bb77150adc 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
@@ -149,7 +149,7 @@ dma_subsys: bus@5a000000 {
 		clock-names = "ipg", "per";
 		assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>;
 		assigned-clock-rates = <24000000>;
-		#pwm-cells = <2>;
+		#pwm-cells = <3>;
 		power-domains = <&pd IMX_SC_R_LCD_0_PWM_0>;
 	};
 
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
index 49ad3413db94..7e510b21bbac 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
@@ -29,7 +29,7 @@ lsio_subsys: bus@5d000000 {
 			 <&pwm0_lpcg 1>;
 		assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>;
 		assigned-clock-rates = <24000000>;
-		#pwm-cells = <2>;
+		#pwm-cells = <3>;
 		interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
 		status = "disabled";
 	};
@@ -42,7 +42,7 @@ lsio_subsys: bus@5d000000 {
 			 <&pwm1_lpcg 1>;
 		assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>;
 		assigned-clock-rates = <24000000>;
-		#pwm-cells = <2>;
+		#pwm-cells = <3>;
 		interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
 		status = "disabled";
 	};
@@ -55,7 +55,7 @@ lsio_subsys: bus@5d000000 {
 			 <&pwm2_lpcg 1>;
 		assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>;
 		assigned-clock-rates = <24000000>;
-		#pwm-cells = <2>;
+		#pwm-cells = <3>;
 		interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
 		status = "disabled";
 	};
@@ -68,7 +68,7 @@ lsio_subsys: bus@5d000000 {
 			 <&pwm3_lpcg 1>;
 		assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>;
 		assigned-clock-rates = <24000000>;
-		#pwm-cells = <2>;
+		#pwm-cells = <3>;
 		interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
 		status = "disabled";
 	};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
index c9a610ba4836..1264da6012f9 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -2072,6 +2072,7 @@
 				phys = <&usb3_phy0>, <&usb3_phy0>;
 				phy-names = "usb2-phy", "usb3-phy";
 				snps,gfladj-refclk-lpm-sel-quirk;
+				snps,parkmode-disable-ss-quirk;
 			};
 
 		};
@@ -2114,6 +2115,7 @@
 				phys = <&usb3_phy1>, <&usb3_phy1>;
 				phy-names = "usb2-phy", "usb3-phy";
 				snps,gfladj-refclk-lpm-sel-quirk;
+				snps,parkmode-disable-ss-quirk;
 			};
 		};
 
diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
index 4b1ce9fc1758..c6dc3ba0d43b 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -1649,6 +1649,7 @@
 			phys = <&usb3_phy0>, <&usb3_phy0>;
 			phy-names = "usb2-phy", "usb3-phy";
 			power-domains = <&pgc_otg1>;
+			snps,parkmode-disable-ss-quirk;
 			status = "disabled";
 		};
 
@@ -1680,6 +1681,7 @@
 			phys = <&usb3_phy1>, <&usb3_phy1>;
 			phy-names = "usb2-phy", "usb3-phy";
 			power-domains = <&pgc_otg2>;
+			snps,parkmode-disable-ss-quirk;
 			status = "disabled";
 		};
 
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
index 01539df335f8..8439dd6b3935 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
@@ -96,6 +96,17 @@
 	status = "okay";
 };
 
+&edma3 {
+	power-domains = <&pd IMX_SC_R_DMA_1_CH0>,
+		     <&pd IMX_SC_R_DMA_1_CH1>,
+		     <&pd IMX_SC_R_DMA_1_CH2>,
+		     <&pd IMX_SC_R_DMA_1_CH3>,
+		     <&pd IMX_SC_R_DMA_1_CH4>,
+		     <&pd IMX_SC_R_DMA_1_CH5>,
+		     <&pd IMX_SC_R_DMA_1_CH6>,
+		     <&pd IMX_SC_R_DMA_1_CH7>;
+};
+
 &flexcan1 {
 	fsl,clk-source = /bits/ 8 <1>;
 };
diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
index f22c1ac391c9..c4a0082f30d3 100644
--- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
@@ -483,7 +483,7 @@
 			};
 		};
 
-		gpioe: gpio@2d000080 {
+		gpioe: gpio@2d000000 {
 				compatible = "fsl,imx8ulp-gpio";
 				reg = <0x2d000000 0x1000>;
 				gpio-controller;
@@ -498,7 +498,7 @@
 				gpio-ranges = <&iomuxc1 0 32 24>;
 		};
 
-		gpiof: gpio@2d010080 {
+		gpiof: gpio@2d010000 {
 				compatible = "fsl,imx8ulp-gpio";
 				reg = <0x2d010000 0x1000>;
 				gpio-controller;
@@ -534,7 +534,7 @@
 			};
 		};
 
-		gpiod: gpio@2e200080 {
+		gpiod: gpio@2e200000 {
 			compatible = "fsl,imx8ulp-gpio";
 			reg = <0x2e200000 0x1000>;
 			gpio-controller;
diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts
index f06139bdff97..3c5c67ebee5d 100644
--- a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts
+++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts
@@ -577,7 +577,7 @@
 		fsl,pins = <
 			MX93_PAD_UART2_TXD__LPUART2_TX		0x31e
 			MX93_PAD_UART2_RXD__LPUART2_RX		0x31e
-			MX93_PAD_SAI1_TXD0__LPUART2_RTS_B	0x31e
+			MX93_PAD_SAI1_TXD0__LPUART2_RTS_B	0x51e
 		>;
 	};
 
diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi
index ceccf4766440..34c0540276d1 100644
--- a/arch/arm64/boot/dts/freescale/imx93.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx93.dtsi
@@ -417,7 +417,7 @@
 					compatible = "fsl,imx93-src-slice";
 					reg = <0x44462400 0x400>, <0x44465800 0x400>;
 					#power-domain-cells = <0>;
-					clocks = <&clk IMX93_CLK_MEDIA_AXI>,
+					clocks = <&clk IMX93_CLK_NIC_MEDIA_GATE>,
 						 <&clk IMX93_CLK_MEDIA_APB>;
 				};
 			};
@@ -957,7 +957,7 @@
 			};
 		};
 
-		gpio2: gpio@43810080 {
+		gpio2: gpio@43810000 {
 			compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio";
 			reg = <0x43810000 0x1000>;
 			gpio-controller;
@@ -972,7 +972,7 @@
 			gpio-ranges = <&iomuxc 0 4 30>;
 		};
 
-		gpio3: gpio@43820080 {
+		gpio3: gpio@43820000 {
 			compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio";
 			reg = <0x43820000 0x1000>;
 			gpio-controller;
@@ -988,7 +988,7 @@
 				      <&iomuxc 26 34 2>, <&iomuxc 28 0 4>;
 		};
 
-		gpio4: gpio@43830080 {
+		gpio4: gpio@43830000 {
 			compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio";
 			reg = <0x43830000 0x1000>;
 			gpio-controller;
@@ -1003,7 +1003,7 @@
 			gpio-ranges = <&iomuxc 0 38 28>, <&iomuxc 28 36 2>;
 		};
 
-		gpio1: gpio@47400080 {
+		gpio1: gpio@47400000 {
 			compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio";
 			reg = <0x47400000 0x1000>;
 			gpio-controller;
diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
index 3b7a176b7904..c46682150e50 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
@@ -73,7 +73,7 @@
 		};
 	};
 
-	memory {
+	memory@40000000 {
 		reg = <0 0x40000000 0 0x40000000>;
 	};
 
diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
index a885a3fbe456..2dc1bdc74e21 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
@@ -55,7 +55,7 @@
 		};
 	};
 
-	memory {
+	memory@40000000 {
 		reg = <0 0x40000000 0 0x20000000>;
 	};
 
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
index af4a4309bda4..b876e501216b 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
@@ -126,6 +126,7 @@
 		compatible = "sff,sfp";
 		i2c-bus = <&i2c_sfp1>;
 		los-gpios = <&pio 46 GPIO_ACTIVE_HIGH>;
+		maximum-power-milliwatt = <3000>;
 		mod-def0-gpios = <&pio 49 GPIO_ACTIVE_LOW>;
 		tx-disable-gpios = <&pio 20 GPIO_ACTIVE_HIGH>;
 		tx-fault-gpios = <&pio 7 GPIO_ACTIVE_HIGH>;
@@ -137,6 +138,7 @@
 		i2c-bus = <&i2c_sfp2>;
 		los-gpios = <&pio 31 GPIO_ACTIVE_HIGH>;
 		mod-def0-gpios = <&pio 47 GPIO_ACTIVE_LOW>;
+		maximum-power-milliwatt = <3000>;
 		tx-disable-gpios = <&pio 15 GPIO_ACTIVE_HIGH>;
 		tx-fault-gpios = <&pio 48 GPIO_ACTIVE_HIGH>;
 	};
@@ -150,16 +152,16 @@
 			trip = <&cpu_trip_active_high>;
 		};
 
-		cpu-active-low {
+		cpu-active-med {
 			/* active: set fan to cooling level 1 */
 			cooling-device = <&fan 1 1>;
-			trip = <&cpu_trip_active_low>;
+			trip = <&cpu_trip_active_med>;
 		};
 
-		cpu-passive {
-			/* passive: set fan to cooling level 0 */
+		cpu-active-low {
+			/* active: set fan to cooling level 0 */
 			cooling-device = <&fan 0 0>;
-			trip = <&cpu_trip_passive>;
+			trip = <&cpu_trip_active_low>;
 		};
 	};
 };
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
index 24eda00e320d..fc751e049953 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
@@ -374,6 +374,10 @@
 			reg = <0 0x11230000 0 0x1000>,
 			      <0 0x11c20000 0 0x1000>;
 			interrupts = <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+			assigned-clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>,
+					  <&topckgen CLK_TOP_EMMC_250M_SEL>;
+			assigned-clock-parents = <&apmixedsys CLK_APMIXED_MPLL>,
+						 <&topckgen CLK_TOP_NET1PLL_D5_D2>;
 			clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>,
 				 <&infracfg CLK_INFRA_MSDC_HCK_CK>,
 				 <&infracfg CLK_INFRA_MSDC_CK>,
@@ -610,22 +614,34 @@
 			thermal-sensors = <&thermal 0>;
 
 			trips {
+				cpu_trip_crit: crit {
+					temperature = <125000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+
+				cpu_trip_hot: hot {
+					temperature = <120000>;
+					hysteresis = <2000>;
+					type = "hot";
+				};
+
 				cpu_trip_active_high: active-high {
 					temperature = <115000>;
 					hysteresis = <2000>;
 					type = "active";
 				};
 
-				cpu_trip_active_low: active-low {
+				cpu_trip_active_med: active-med {
 					temperature = <85000>;
 					hysteresis = <2000>;
 					type = "active";
 				};
 
-				cpu_trip_passive: passive {
-					temperature = <40000>;
+				cpu_trip_active_low: active-low {
+					temperature = <60000>;
 					hysteresis = <2000>;
-					type = "passive";
+					type = "active";
 				};
 			};
 		};
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
index 5122963d8743..d258c80213b2 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
@@ -44,7 +44,7 @@
 		id-gpio = <&pio 16 GPIO_ACTIVE_HIGH>;
 	};
 
-	usb_p1_vbus: regulator@0 {
+	usb_p1_vbus: regulator-usb-p1 {
 		compatible = "regulator-fixed";
 		regulator-name = "usb_vbus";
 		regulator-min-microvolt = <5000000>;
@@ -53,7 +53,7 @@
 		enable-active-high;
 	};
 
-	usb_p0_vbus: regulator@1 {
+	usb_p0_vbus: regulator-usb-p0 {
 		compatible = "regulator-fixed";
 		regulator-name = "vbus";
 		regulator-min-microvolt = <5000000>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts
index ce336a48c897..77f9ab94c00b 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts
@@ -31,14 +31,14 @@
 		#address-cells = <2>;
 		#size-cells = <2>;
 		ranges;
-		scp_mem_reserved: scp_mem_region {
+		scp_mem_reserved: memory@50000000 {
 			compatible = "shared-dma-pool";
 			reg = <0 0x50000000 0 0x2900000>;
 			no-map;
 		};
 	};
 
-	ntc@0 {
+	thermal-sensor {
 		compatible = "murata,ncp03wf104";
 		pullup-uv = <1800000>;
 		pullup-ohm = <390000>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
index bf97b60ae4d1..820260348de9 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi
@@ -91,6 +91,8 @@
 
 &dsi0 {
 	status = "okay";
+	/delete-property/#size-cells;
+	/delete-property/#address-cells;
 	/delete-node/panel@0;
 	ports {
 		port {
@@ -441,20 +443,20 @@
 	};
 
 	touchscreen_pins: touchscreen-pins {
-		touch_int_odl {
+		touch-int-odl {
 			pinmux = <PINMUX_GPIO155__FUNC_GPIO155>;
 			input-enable;
 			bias-pull-up;
 		};
 
-		touch_rst_l {
+		touch-rst-l {
 			pinmux = <PINMUX_GPIO156__FUNC_GPIO156>;
 			output-high;
 		};
 	};
 
 	trackpad_pins: trackpad-pins {
-		trackpad_int {
+		trackpad-int {
 			pinmux = <PINMUX_GPIO7__FUNC_GPIO7>;
 			input-enable;
 			bias-disable; /* pulled externally */
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
index bf7de35ffcbc..7881a27be029 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
@@ -116,7 +116,7 @@
 		#size-cells = <2>;
 		ranges;
 
-		scp_mem_reserved: scp_mem_region {
+		scp_mem_reserved: memory@50000000 {
 			compatible = "shared-dma-pool";
 			reg = <0 0x50000000 0 0x2900000>;
 			no-map;
@@ -460,7 +460,7 @@
 
 &pio {
 	aud_pins_default: audiopins {
-		pins_bus {
+		pins-bus {
 			pinmux = <PINMUX_GPIO97__FUNC_I2S2_MCK>,
 				<PINMUX_GPIO98__FUNC_I2S2_BCK>,
 				<PINMUX_GPIO101__FUNC_I2S2_LRCK>,
@@ -482,7 +482,7 @@
 	};
 
 	aud_pins_tdm_out_on: audiotdmouton {
-		pins_bus {
+		pins-bus {
 			pinmux = <PINMUX_GPIO169__FUNC_TDM_BCK_2ND>,
 				<PINMUX_GPIO170__FUNC_TDM_LRCK_2ND>,
 				<PINMUX_GPIO171__FUNC_TDM_DATA0_2ND>,
@@ -494,7 +494,7 @@
 	};
 
 	aud_pins_tdm_out_off: audiotdmoutoff {
-		pins_bus {
+		pins-bus {
 			pinmux = <PINMUX_GPIO169__FUNC_GPIO169>,
 				<PINMUX_GPIO170__FUNC_GPIO170>,
 				<PINMUX_GPIO171__FUNC_GPIO171>,
@@ -508,13 +508,13 @@
 	};
 
 	bt_pins: bt-pins {
-		pins_bt_en {
+		pins-bt-en {
 			pinmux = <PINMUX_GPIO120__FUNC_GPIO120>;
 			output-low;
 		};
 	};
 
-	ec_ap_int_odl: ec_ap_int_odl {
+	ec_ap_int_odl: ec-ap-int-odl {
 		pins1 {
 			pinmux = <PINMUX_GPIO151__FUNC_GPIO151>;
 			input-enable;
@@ -522,7 +522,7 @@
 		};
 	};
 
-	h1_int_od_l: h1_int_od_l {
+	h1_int_od_l: h1-int-od-l {
 		pins1 {
 			pinmux = <PINMUX_GPIO153__FUNC_GPIO153>;
 			input-enable;
@@ -530,7 +530,7 @@
 	};
 
 	i2c0_pins: i2c0 {
-		pins_bus {
+		pins-bus {
 			pinmux = <PINMUX_GPIO82__FUNC_SDA0>,
 				 <PINMUX_GPIO83__FUNC_SCL0>;
 			mediatek,pull-up-adv = <3>;
@@ -539,7 +539,7 @@
 	};
 
 	i2c1_pins: i2c1 {
-		pins_bus {
+		pins-bus {
 			pinmux = <PINMUX_GPIO81__FUNC_SDA1>,
 				 <PINMUX_GPIO84__FUNC_SCL1>;
 			mediatek,pull-up-adv = <3>;
@@ -548,7 +548,7 @@
 	};
 
 	i2c2_pins: i2c2 {
-		pins_bus {
+		pins-bus {
 			pinmux = <PINMUX_GPIO103__FUNC_SCL2>,
 				 <PINMUX_GPIO104__FUNC_SDA2>;
 			bias-disable;
@@ -557,7 +557,7 @@
 	};
 
 	i2c3_pins: i2c3 {
-		pins_bus {
+		pins-bus {
 			pinmux = <PINMUX_GPIO50__FUNC_SCL3>,
 				 <PINMUX_GPIO51__FUNC_SDA3>;
 			mediatek,pull-up-adv = <3>;
@@ -566,7 +566,7 @@
 	};
 
 	i2c4_pins: i2c4 {
-		pins_bus {
+		pins-bus {
 			pinmux = <PINMUX_GPIO105__FUNC_SCL4>,
 				 <PINMUX_GPIO106__FUNC_SDA4>;
 			bias-disable;
@@ -575,7 +575,7 @@
 	};
 
 	i2c5_pins: i2c5 {
-		pins_bus {
+		pins-bus {
 			pinmux = <PINMUX_GPIO48__FUNC_SCL5>,
 				 <PINMUX_GPIO49__FUNC_SDA5>;
 			mediatek,pull-up-adv = <3>;
@@ -584,7 +584,7 @@
 	};
 
 	i2c6_pins: i2c6 {
-		pins_bus {
+		pins-bus {
 			pinmux = <PINMUX_GPIO11__FUNC_SCL6>,
 				 <PINMUX_GPIO12__FUNC_SDA6>;
 			bias-disable;
@@ -592,7 +592,7 @@
 	};
 
 	mmc0_pins_default: mmc0-pins-default {
-		pins_cmd_dat {
+		pins-cmd-dat {
 			pinmux = <PINMUX_GPIO123__FUNC_MSDC0_DAT0>,
 				 <PINMUX_GPIO128__FUNC_MSDC0_DAT1>,
 				 <PINMUX_GPIO125__FUNC_MSDC0_DAT2>,
@@ -607,13 +607,13 @@
 			mediatek,pull-up-adv = <01>;
 		};
 
-		pins_clk {
+		pins-clk {
 			pinmux = <PINMUX_GPIO124__FUNC_MSDC0_CLK>;
 			drive-strength = <MTK_DRIVE_14mA>;
 			mediatek,pull-down-adv = <10>;
 		};
 
-		pins_rst {
+		pins-rst {
 			pinmux = <PINMUX_GPIO133__FUNC_MSDC0_RSTB>;
 			drive-strength = <MTK_DRIVE_14mA>;
 			mediatek,pull-down-adv = <01>;
@@ -621,7 +621,7 @@
 	};
 
 	mmc0_pins_uhs: mmc0-pins-uhs {
-		pins_cmd_dat {
+		pins-cmd-dat {
 			pinmux = <PINMUX_GPIO123__FUNC_MSDC0_DAT0>,
 				 <PINMUX_GPIO128__FUNC_MSDC0_DAT1>,
 				 <PINMUX_GPIO125__FUNC_MSDC0_DAT2>,
@@ -636,19 +636,19 @@
 			mediatek,pull-up-adv = <01>;
 		};
 
-		pins_clk {
+		pins-clk {
 			pinmux = <PINMUX_GPIO124__FUNC_MSDC0_CLK>;
 			drive-strength = <MTK_DRIVE_14mA>;
 			mediatek,pull-down-adv = <10>;
 		};
 
-		pins_ds {
+		pins-ds {
 			pinmux = <PINMUX_GPIO131__FUNC_MSDC0_DSL>;
 			drive-strength = <MTK_DRIVE_14mA>;
 			mediatek,pull-down-adv = <10>;
 		};
 
-		pins_rst {
+		pins-rst {
 			pinmux = <PINMUX_GPIO133__FUNC_MSDC0_RSTB>;
 			drive-strength = <MTK_DRIVE_14mA>;
 			mediatek,pull-up-adv = <01>;
@@ -656,7 +656,7 @@
 	};
 
 	mmc1_pins_default: mmc1-pins-default {
-		pins_cmd_dat {
+		pins-cmd-dat {
 			pinmux = <PINMUX_GPIO31__FUNC_MSDC1_CMD>,
 				 <PINMUX_GPIO32__FUNC_MSDC1_DAT0>,
 				 <PINMUX_GPIO34__FUNC_MSDC1_DAT1>,
@@ -666,7 +666,7 @@
 			mediatek,pull-up-adv = <10>;
 		};
 
-		pins_clk {
+		pins-clk {
 			pinmux = <PINMUX_GPIO29__FUNC_MSDC1_CLK>;
 			input-enable;
 			mediatek,pull-down-adv = <10>;
@@ -674,7 +674,7 @@
 	};
 
 	mmc1_pins_uhs: mmc1-pins-uhs {
-		pins_cmd_dat {
+		pins-cmd-dat {
 			pinmux = <PINMUX_GPIO31__FUNC_MSDC1_CMD>,
 				 <PINMUX_GPIO32__FUNC_MSDC1_DAT0>,
 				 <PINMUX_GPIO34__FUNC_MSDC1_DAT1>,
@@ -685,7 +685,7 @@
 			mediatek,pull-up-adv = <10>;
 		};
 
-		pins_clk {
+		pins-clk {
 			pinmux = <PINMUX_GPIO29__FUNC_MSDC1_CLK>;
 			drive-strength = <MTK_DRIVE_8mA>;
 			mediatek,pull-down-adv = <10>;
@@ -693,15 +693,15 @@
 		};
 	};
 
-	panel_pins_default: panel_pins_default {
-		panel_reset {
+	panel_pins_default: panel-pins-default {
+		panel-reset {
 			pinmux = <PINMUX_GPIO45__FUNC_GPIO45>;
 			output-low;
 			bias-pull-up;
 		};
 	};
 
-	pwm0_pin_default: pwm0_pin_default {
+	pwm0_pin_default: pwm0-pin-default {
 		pins1 {
 			pinmux = <PINMUX_GPIO176__FUNC_GPIO176>;
 			output-high;
@@ -713,14 +713,14 @@
 	};
 
 	scp_pins: scp {
-		pins_scp_uart {
+		pins-scp-uart {
 			pinmux = <PINMUX_GPIO110__FUNC_TP_URXD1_AO>,
 				 <PINMUX_GPIO112__FUNC_TP_UTXD1_AO>;
 		};
 	};
 
 	spi0_pins: spi0 {
-		pins_spi {
+		pins-spi {
 			pinmux = <PINMUX_GPIO85__FUNC_SPI0_MI>,
 				 <PINMUX_GPIO86__FUNC_GPIO86>,
 				 <PINMUX_GPIO87__FUNC_SPI0_MO>,
@@ -730,7 +730,7 @@
 	};
 
 	spi1_pins: spi1 {
-		pins_spi {
+		pins-spi {
 			pinmux = <PINMUX_GPIO161__FUNC_SPI1_A_MI>,
 				 <PINMUX_GPIO162__FUNC_SPI1_A_CSB>,
 				 <PINMUX_GPIO163__FUNC_SPI1_A_MO>,
@@ -740,20 +740,20 @@
 	};
 
 	spi2_pins: spi2 {
-		pins_spi {
+		pins-spi {
 			pinmux = <PINMUX_GPIO0__FUNC_SPI2_CSB>,
 				 <PINMUX_GPIO1__FUNC_SPI2_MO>,
 				 <PINMUX_GPIO2__FUNC_SPI2_CLK>;
 			bias-disable;
 		};
-		pins_spi_mi {
+		pins-spi-mi {
 			pinmux = <PINMUX_GPIO94__FUNC_SPI2_MI>;
 			mediatek,pull-down-adv = <00>;
 		};
 	};
 
 	spi3_pins: spi3 {
-		pins_spi {
+		pins-spi {
 			pinmux = <PINMUX_GPIO21__FUNC_SPI3_MI>,
 				 <PINMUX_GPIO22__FUNC_SPI3_CSB>,
 				 <PINMUX_GPIO23__FUNC_SPI3_MO>,
@@ -763,7 +763,7 @@
 	};
 
 	spi4_pins: spi4 {
-		pins_spi {
+		pins-spi {
 			pinmux = <PINMUX_GPIO17__FUNC_SPI4_MI>,
 				 <PINMUX_GPIO18__FUNC_SPI4_CSB>,
 				 <PINMUX_GPIO19__FUNC_SPI4_MO>,
@@ -773,7 +773,7 @@
 	};
 
 	spi5_pins: spi5 {
-		pins_spi {
+		pins-spi {
 			pinmux = <PINMUX_GPIO13__FUNC_SPI5_MI>,
 				 <PINMUX_GPIO14__FUNC_SPI5_CSB>,
 				 <PINMUX_GPIO15__FUNC_SPI5_MO>,
@@ -783,63 +783,63 @@
 	};
 
 	uart0_pins_default: uart0-pins-default {
-		pins_rx {
+		pins-rx {
 			pinmux = <PINMUX_GPIO95__FUNC_URXD0>;
 			input-enable;
 			bias-pull-up;
 		};
-		pins_tx {
+		pins-tx {
 			pinmux = <PINMUX_GPIO96__FUNC_UTXD0>;
 		};
 	};
 
 	uart1_pins_default: uart1-pins-default {
-		pins_rx {
+		pins-rx {
 			pinmux = <PINMUX_GPIO121__FUNC_URXD1>;
 			input-enable;
 			bias-pull-up;
 		};
-		pins_tx {
+		pins-tx {
 			pinmux = <PINMUX_GPIO115__FUNC_UTXD1>;
 		};
-		pins_rts {
+		pins-rts {
 			pinmux = <PINMUX_GPIO47__FUNC_URTS1>;
 			output-enable;
 		};
-		pins_cts {
+		pins-cts {
 			pinmux = <PINMUX_GPIO46__FUNC_UCTS1>;
 			input-enable;
 		};
 	};
 
 	uart1_pins_sleep: uart1-pins-sleep {
-		pins_rx {
+		pins-rx {
 			pinmux = <PINMUX_GPIO121__FUNC_GPIO121>;
 			input-enable;
 			bias-pull-up;
 		};
-		pins_tx {
+		pins-tx {
 			pinmux = <PINMUX_GPIO115__FUNC_UTXD1>;
 		};
-		pins_rts {
+		pins-rts {
 			pinmux = <PINMUX_GPIO47__FUNC_URTS1>;
 			output-enable;
 		};
-		pins_cts {
+		pins-cts {
 			pinmux = <PINMUX_GPIO46__FUNC_UCTS1>;
 			input-enable;
 		};
 	};
 
 	wifi_pins_pwrseq: wifi-pins-pwrseq {
-		pins_wifi_enable {
+		pins-wifi-enable {
 			pinmux = <PINMUX_GPIO119__FUNC_GPIO119>;
 			output-low;
 		};
 	};
 
 	wifi_pins_wakeup: wifi-pins-wakeup {
-		pins_wifi_wakeup {
+		pins-wifi-wakeup {
 			pinmux = <PINMUX_GPIO113__FUNC_GPIO113>;
 			input-enable;
 		};
diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
index 5169779d01df..976dc968b3ca 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
@@ -1210,127 +1210,6 @@
 			nvmem-cell-names = "calibration-data";
 		};
 
-		thermal_zones: thermal-zones {
-			cpu_thermal: cpu-thermal {
-				polling-delay-passive = <100>;
-				polling-delay = <500>;
-				thermal-sensors = <&thermal 0>;
-				sustainable-power = <5000>;
-
-				trips {
-					threshold: trip-point0 {
-						temperature = <68000>;
-						hysteresis = <2000>;
-						type = "passive";
-					};
-
-					target: trip-point1 {
-						temperature = <80000>;
-						hysteresis = <2000>;
-						type = "passive";
-					};
-
-					cpu_crit: cpu-crit {
-						temperature = <115000>;
-						hysteresis = <2000>;
-						type = "critical";
-					};
-				};
-
-				cooling-maps {
-					map0 {
-						trip = <&target>;
-						cooling-device = <&cpu0
-							THERMAL_NO_LIMIT
-							THERMAL_NO_LIMIT>,
-								 <&cpu1
-							THERMAL_NO_LIMIT
-							THERMAL_NO_LIMIT>,
-								 <&cpu2
-							THERMAL_NO_LIMIT
-							THERMAL_NO_LIMIT>,
-								 <&cpu3
-							THERMAL_NO_LIMIT
-							THERMAL_NO_LIMIT>;
-						contribution = <3072>;
-					};
-					map1 {
-						trip = <&target>;
-						cooling-device = <&cpu4
-							THERMAL_NO_LIMIT
-							THERMAL_NO_LIMIT>,
-								 <&cpu5
-							THERMAL_NO_LIMIT
-							THERMAL_NO_LIMIT>,
-								 <&cpu6
-							THERMAL_NO_LIMIT
-							THERMAL_NO_LIMIT>,
-								 <&cpu7
-							THERMAL_NO_LIMIT
-							THERMAL_NO_LIMIT>;
-						contribution = <1024>;
-					};
-				};
-			};
-
-			/* The tzts1 ~ tzts6 don't need to polling */
-			/* The tzts1 ~ tzts6 don't need to thermal throttle */
-
-			tzts1: tzts1 {
-				polling-delay-passive = <0>;
-				polling-delay = <0>;
-				thermal-sensors = <&thermal 1>;
-				sustainable-power = <5000>;
-				trips {};
-				cooling-maps {};
-			};
-
-			tzts2: tzts2 {
-				polling-delay-passive = <0>;
-				polling-delay = <0>;
-				thermal-sensors = <&thermal 2>;
-				sustainable-power = <5000>;
-				trips {};
-				cooling-maps {};
-			};
-
-			tzts3: tzts3 {
-				polling-delay-passive = <0>;
-				polling-delay = <0>;
-				thermal-sensors = <&thermal 3>;
-				sustainable-power = <5000>;
-				trips {};
-				cooling-maps {};
-			};
-
-			tzts4: tzts4 {
-				polling-delay-passive = <0>;
-				polling-delay = <0>;
-				thermal-sensors = <&thermal 4>;
-				sustainable-power = <5000>;
-				trips {};
-				cooling-maps {};
-			};
-
-			tzts5: tzts5 {
-				polling-delay-passive = <0>;
-				polling-delay = <0>;
-				thermal-sensors = <&thermal 5>;
-				sustainable-power = <5000>;
-				trips {};
-				cooling-maps {};
-			};
-
-			tztsABB: tztsABB {
-				polling-delay-passive = <0>;
-				polling-delay = <0>;
-				thermal-sensors = <&thermal 6>;
-				sustainable-power = <5000>;
-				trips {};
-				cooling-maps {};
-			};
-		};
-
 		pwm0: pwm@1100e000 {
 			compatible = "mediatek,mt8183-disp-pwm";
 			reg = <0 0x1100e000 0 0x1000>;
@@ -2105,4 +1984,125 @@
 			power-domains = <&spm MT8183_POWER_DOMAIN_CAM>;
 		};
 	};
+
+	thermal_zones: thermal-zones {
+		cpu_thermal: cpu-thermal {
+			polling-delay-passive = <100>;
+			polling-delay = <500>;
+			thermal-sensors = <&thermal 0>;
+			sustainable-power = <5000>;
+
+			trips {
+				threshold: trip-point0 {
+					temperature = <68000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+
+				target: trip-point1 {
+					temperature = <80000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+
+				cpu_crit: cpu-crit {
+					temperature = <115000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				map0 {
+					trip = <&target>;
+					cooling-device = <&cpu0
+						THERMAL_NO_LIMIT
+						THERMAL_NO_LIMIT>,
+							 <&cpu1
+						THERMAL_NO_LIMIT
+						THERMAL_NO_LIMIT>,
+							 <&cpu2
+						THERMAL_NO_LIMIT
+						THERMAL_NO_LIMIT>,
+							 <&cpu3
+						THERMAL_NO_LIMIT
+						THERMAL_NO_LIMIT>;
+					contribution = <3072>;
+				};
+				map1 {
+					trip = <&target>;
+					cooling-device = <&cpu4
+						THERMAL_NO_LIMIT
+						THERMAL_NO_LIMIT>,
+							 <&cpu5
+						THERMAL_NO_LIMIT
+						THERMAL_NO_LIMIT>,
+							 <&cpu6
+						THERMAL_NO_LIMIT
+						THERMAL_NO_LIMIT>,
+							 <&cpu7
+						THERMAL_NO_LIMIT
+						THERMAL_NO_LIMIT>;
+					contribution = <1024>;
+				};
+			};
+		};
+
+		/* The tzts1 ~ tzts6 don't need to polling */
+		/* The tzts1 ~ tzts6 don't need to thermal throttle */
+
+		tzts1: tzts1 {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&thermal 1>;
+			sustainable-power = <5000>;
+			trips {};
+			cooling-maps {};
+		};
+
+		tzts2: tzts2 {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&thermal 2>;
+			sustainable-power = <5000>;
+			trips {};
+			cooling-maps {};
+		};
+
+		tzts3: tzts3 {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&thermal 3>;
+			sustainable-power = <5000>;
+			trips {};
+			cooling-maps {};
+		};
+
+		tzts4: tzts4 {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&thermal 4>;
+			sustainable-power = <5000>;
+			trips {};
+			cooling-maps {};
+		};
+
+		tzts5: tzts5 {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&thermal 5>;
+			sustainable-power = <5000>;
+			trips {};
+			cooling-maps {};
+		};
+
+		tztsABB: tztsABB {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&thermal 6>;
+			sustainable-power = <5000>;
+			trips {};
+			cooling-maps {};
+		};
+	};
 };
diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi
index f04ae70c470a..df0c04f2ba1d 100644
--- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi
@@ -924,7 +924,8 @@
 					reg = <MT8186_POWER_DOMAIN_CSIRX_TOP>;
 					clocks = <&topckgen CLK_TOP_SENINF>,
 						 <&topckgen CLK_TOP_SENINF1>;
-					clock-names = "csirx_top0", "csirx_top1";
+					clock-names = "subsys-csirx-top0",
+						      "subsys-csirx-top1";
 					#power-domain-cells = <0>;
 				};
 
@@ -942,7 +943,8 @@
 					reg = <MT8186_POWER_DOMAIN_ADSP_AO>;
 					clocks = <&topckgen CLK_TOP_AUDIODSP>,
 						 <&topckgen CLK_TOP_ADSP_BUS>;
-					clock-names = "audioadsp", "adsp_bus";
+					clock-names = "audioadsp",
+						      "subsys-adsp-bus";
 					#address-cells = <1>;
 					#size-cells = <0>;
 					#power-domain-cells = <1>;
@@ -975,8 +977,11 @@
 						 <&mmsys CLK_MM_SMI_COMMON>,
 						 <&mmsys CLK_MM_SMI_GALS>,
 						 <&mmsys CLK_MM_SMI_IOMMU>;
-					clock-names = "disp", "mdp", "smi_infra", "smi_common",
-						     "smi_gals", "smi_iommu";
+					clock-names = "disp", "mdp",
+						      "subsys-smi-infra",
+						      "subsys-smi-common",
+						      "subsys-smi-gals",
+						      "subsys-smi-iommu";
 					mediatek,infracfg = <&infracfg_ao>;
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -993,15 +998,17 @@
 
 					power-domain@MT8186_POWER_DOMAIN_CAM {
 						reg = <MT8186_POWER_DOMAIN_CAM>;
-						clocks = <&topckgen CLK_TOP_CAM>,
-							 <&topckgen CLK_TOP_SENINF>,
+						clocks = <&topckgen CLK_TOP_SENINF>,
 							 <&topckgen CLK_TOP_SENINF1>,
 							 <&topckgen CLK_TOP_SENINF2>,
 							 <&topckgen CLK_TOP_SENINF3>,
+							 <&camsys CLK_CAM2MM_GALS>,
 							 <&topckgen CLK_TOP_CAMTM>,
-							 <&camsys CLK_CAM2MM_GALS>;
-						clock-names = "cam-top", "cam0", "cam1", "cam2",
-							     "cam3", "cam-tm", "gals";
+							 <&topckgen CLK_TOP_CAM>;
+						clock-names = "cam0", "cam1", "cam2",
+							      "cam3", "gals",
+							      "subsys-cam-tm",
+							      "subsys-cam-top";
 						mediatek,infracfg = <&infracfg_ao>;
 						#address-cells = <1>;
 						#size-cells = <0>;
@@ -1020,9 +1027,9 @@
 
 					power-domain@MT8186_POWER_DOMAIN_IMG {
 						reg = <MT8186_POWER_DOMAIN_IMG>;
-						clocks = <&topckgen CLK_TOP_IMG1>,
-							 <&imgsys1 CLK_IMG1_GALS_IMG1>;
-						clock-names = "img-top", "gals";
+						clocks = <&imgsys1 CLK_IMG1_GALS_IMG1>,
+							 <&topckgen CLK_TOP_IMG1>;
+						clock-names = "gals", "subsys-img-top";
 						mediatek,infracfg = <&infracfg_ao>;
 						#address-cells = <1>;
 						#size-cells = <0>;
@@ -1041,8 +1048,11 @@
 							 <&ipesys CLK_IPE_LARB20>,
 							 <&ipesys CLK_IPE_SMI_SUBCOM>,
 							 <&ipesys CLK_IPE_GALS_IPE>;
-						clock-names = "ipe-top", "ipe-larb0", "ipe-larb1",
-							      "ipe-smi", "ipe-gals";
+						clock-names = "subsys-ipe-top",
+							      "subsys-ipe-larb0",
+							      "subsys-ipe-larb1",
+							      "subsys-ipe-smi",
+							      "subsys-ipe-gals";
 						mediatek,infracfg = <&infracfg_ao>;
 						#power-domain-cells = <0>;
 					};
@@ -1061,7 +1071,9 @@
 						clocks = <&topckgen CLK_TOP_WPE>,
 							 <&wpesys CLK_WPE_SMI_LARB8_CK_EN>,
 							 <&wpesys CLK_WPE_SMI_LARB8_PCLK_EN>;
-						clock-names = "wpe0", "larb-ck", "larb-pclk";
+						clock-names = "wpe0",
+							      "subsys-larb-ck",
+							      "subsys-larb-pclk";
 						mediatek,infracfg = <&infracfg_ao>;
 						#power-domain-cells = <0>;
 					};
@@ -1656,7 +1668,7 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 
-			gpu_speedbin: gpu-speed-bin@59c {
+			gpu_speedbin: gpu-speedbin@59c {
 				reg = <0x59c 0x4>;
 				bits = <0 3>;
 			};
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
index dd5b89b73190..5a7cab489ff3 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
@@ -389,7 +389,7 @@
 	pinctrl-0 = <&i2c7_pins>;
 
 	pmic@34 {
-		#interrupt-cells = <1>;
+		#interrupt-cells = <2>;
 		compatible = "mediatek,mt6360";
 		reg = <0x34>;
 		interrupt-controller;
diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
index 54c674c45b49..e0ac2e9f5b72 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
@@ -627,6 +627,8 @@
 
 					power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 {
 						reg = <MT8195_POWER_DOMAIN_VENC_CORE1>;
+						clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>;
+						clock-names = "venc1-larb";
 						mediatek,infracfg = <&infracfg_ao>;
 						#power-domain-cells = <0>;
 					};
@@ -689,6 +691,8 @@
 
 						power-domain@MT8195_POWER_DOMAIN_VENC {
 							reg = <MT8195_POWER_DOMAIN_VENC>;
+							clocks = <&vencsys CLK_VENC_LARB>;
+							clock-names = "venc0-larb";
 							mediatek,infracfg = <&infracfg_ao>;
 							#power-domain-cells = <0>;
 						};
@@ -2665,7 +2669,7 @@
 			reg = <0 0x1b010000 0 0x1000>;
 			mediatek,larb-id = <20>;
 			mediatek,smi = <&smi_common_vpp>;
-			clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>,
+			clocks = <&vencsys_core1 CLK_VENC_CORE1_VENC>,
 				 <&vencsys_core1 CLK_VENC_CORE1_GALS>,
 				 <&vppsys0 CLK_VPP0_GALS_VDO0_VDO1_VENCSYS_CORE1>;
 			clock-names = "apb", "smi", "gals";
diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts
index de0a1f2af983..7d4c5324c61b 100644
--- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts
+++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts
@@ -86,7 +86,7 @@
 	sgtl5000_clk: sgtl5000-oscillator {
 		compatible = "fixed-clock";
 		#clock-cells = <0>;
-		clock-frequency  = <24576000>;
+		clock-frequency = <24576000>;
 	};
 
 	dc_12v: dc-12v-regulator {
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index e729e7a22b23..cc8209795c3e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -668,7 +668,7 @@
 
 	vdec: video-codec@ff360000 {
 		compatible = "rockchip,rk3328-vdec", "rockchip,rk3399-vdec";
-		reg = <0x0 0xff360000 0x0 0x400>;
+		reg = <0x0 0xff360000 0x0 0x480>;
 		interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&cru ACLK_RKVDEC>, <&cru HCLK_RKVDEC>,
 			 <&cru SCLK_VDEC_CABAC>, <&cru SCLK_VDEC_CORE>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
index 5c1929d41cc0..cacbad35cfc8 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
@@ -509,8 +509,7 @@ ap_i2c_tp: &i2c5 {
 &pci_rootport {
 	mvl_wifi: wifi@0,0 {
 		compatible = "pci1b4b,2b42";
-		reg = <0x83010000 0x0 0x00000000 0x0 0x00100000
-		       0x83010000 0x0 0x00100000 0x0 0x00100000>;
+		reg = <0x0000 0x0 0x0 0x0 0x0>;
 		interrupt-parent = <&gpio0>;
 		interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
 		pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts
index 853e88455e75..9e4b12ed62cb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts
@@ -34,8 +34,8 @@
 &pci_rootport {
 	wifi@0,0 {
 		compatible = "qcom,ath10k";
-		reg = <0x00010000 0x0 0x00000000 0x0 0x00000000>,
-		      <0x03010010 0x0 0x00000000 0x0 0x00200000>;
+		reg = <0x00000000 0x0 0x00000000 0x0 0x00000000>,
+		      <0x03000010 0x0 0x00000000 0x0 0x00200000>;
 		qcom,ath10k-calibration-variant = "GO_DUMO";
 	};
 };
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
index c9bf1d5c3a42..789fd0dcc88b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
@@ -489,6 +489,7 @@ ap_i2c_audio: &i2c8 {
 		#address-cells = <3>;
 		#size-cells = <2>;
 		ranges;
+		device_type = "pci";
 	};
 };
 
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index faf02e59d6c7..da0dfb237f85 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -1109,7 +1109,9 @@
 			power-domain@RK3399_PD_VDU {
 				reg = <RK3399_PD_VDU>;
 				clocks = <&cru ACLK_VDU>,
-					 <&cru HCLK_VDU>;
+					 <&cru HCLK_VDU>,
+					 <&cru SCLK_VDU_CA>,
+					 <&cru SCLK_VDU_CORE>;
 				pm_qos = <&qos_video_m1_r>,
 					 <&qos_video_m1_w>;
 				#power-domain-cells = <0>;
@@ -1384,7 +1386,7 @@
 
 	vdec: video-codec@ff660000 {
 		compatible = "rockchip,rk3399-vdec";
-		reg = <0x0 0xff660000 0x0 0x400>;
+		reg = <0x0 0xff660000 0x0 0x480>;
 		interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH 0>;
 		clocks = <&cru ACLK_VDU>, <&cru HCLK_VDU>,
 			 <&cru SCLK_VDU_CA>, <&cru SCLK_VDU_CORE>;
diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
index 0964761e3ce9..c19c0f1b3778 100644
--- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
@@ -977,7 +977,7 @@
 			     <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-names = "sys", "pmc", "msi", "legacy", "err";
+		interrupt-names = "sys", "pmc", "msg", "legacy", "err";
 		bus-range = <0x0 0xf>;
 		clocks = <&cru ACLK_PCIE20_MST>, <&cru ACLK_PCIE20_SLV>,
 			 <&cru ACLK_PCIE20_DBI>, <&cru PCLK_PCIE20>,
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
index 9570b34aca2e..d88c0e852356 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi
@@ -235,13 +235,13 @@
 &pinctrl {
 	fan {
 		fan_int: fan-int {
-			rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_none>;
+			rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_up>;
 		};
 	};
 
 	hym8563 {
 		hym8563_int: hym8563-int {
-			rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>;
+			rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_up>;
 		};
 	};
 
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts
index 8f399c4317bd..e3a839a12dc6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts
@@ -38,7 +38,7 @@
 	leds {
 		compatible = "gpio-leds";
 		pinctrl-names = "default";
-		pinctrl-0 =<&leds_gpio>;
+		pinctrl-0 = <&leds_gpio>;
 
 		led-1 {
 			gpios = <&gpio1 RK_PA2 GPIO_ACTIVE_HIGH>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi
index 63151d9d2377..30db12c4fc82 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi
@@ -369,7 +369,7 @@
 		emmc_data_strobe: emmc-data-strobe {
 			rockchip,pins =
 				/* emmc_data_strobe */
-				<2 RK_PA2 1 &pcfg_pull_none>;
+				<2 RK_PA2 1 &pcfg_pull_down>;
 		};
 	};
 
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 7064c0e9179f..8aa0499f9b03 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -1362,7 +1362,6 @@
 			     <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH 0>,
 			     <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH 0>,
 			     <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>;
-		interrupt-names = "ch0", "ch1", "ch2", "ch3";
 		rockchip,pmu = <&pmu1grf>;
 	};
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 646591c67e7a..91d2d6714969 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1839,6 +1839,10 @@ static int __init __kpti_install_ng_mappings(void *__unused)
 
 static void __init kpti_install_ng_mappings(void)
 {
+	/* Check whether KPTI is going to be used */
+	if (!cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0))
+		return;
+
 	/*
 	 * We don't need to rewrite the page-tables if either we've done
 	 * it already or we have KASLR enabled and therefore have not
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index 339a55194b2c..74a67ad87f29 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -436,6 +436,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
 	if (ret)
 		goto out;
 
+	/* Silently exit if the vLPI is already mapped */
+	if (irq->hw)
+		goto out;
+
 	/*
 	 * Emit the mapping request. If it fails, the ITS probably
 	 * isn't v4 compatible, so let's silently bail out. Holding
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 204b94b2e6aa..4ba8d67ddb09 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -83,7 +83,7 @@ endif
 
 ifeq ($(CONFIG_RELOCATABLE),y)
 KBUILD_CFLAGS_KERNEL		+= -fPIE
-LDFLAGS_vmlinux			+= -static -pie --no-dynamic-linker -z notext
+LDFLAGS_vmlinux			+= -static -pie --no-dynamic-linker -z notext $(call ld-option, --apply-dynamic-relocs)
 endif
 
 cflags-y += $(call cc-option, -mno-check-zero-division)
diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h
index b9a4ab54285c..9b16a3b8e706 100644
--- a/arch/loongarch/include/asm/elf.h
+++ b/arch/loongarch/include/asm/elf.h
@@ -293,7 +293,7 @@ extern const char *__elf_platform;
 #define ELF_PLAT_INIT(_r, load_addr)	do { \
 	_r->regs[1] = _r->regs[2] = _r->regs[3] = _r->regs[4] = 0;	\
 	_r->regs[5] = _r->regs[6] = _r->regs[7] = _r->regs[8] = 0;	\
-	_r->regs[9] = _r->regs[10] = _r->regs[11] = _r->regs[12] = 0;	\
+	_r->regs[9] = _r->regs[10] /* syscall n */ = _r->regs[12] = 0;	\
 	_r->regs[13] = _r->regs[14] = _r->regs[15] = _r->regs[16] = 0;	\
 	_r->regs[17] = _r->regs[18] = _r->regs[19] = _r->regs[20] = 0;	\
 	_r->regs[21] = _r->regs[22] = _r->regs[23] = _r->regs[24] = 0;	\
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 9b4957cefa8a..46366e783c84 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -1098,12 +1098,11 @@
 
 static __always_inline u64 drdtime(void)
 {
-	int rID = 0;
 	u64 val = 0;
 
 	__asm__ __volatile__(
-		"rdtime.d %0, %1 \n\t"
-		: "=r"(val), "=r"(rID)
+		"rdtime.d %0, $zero\n\t"
+		: "=r"(val)
 		:
 		);
 	return val;
diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c
index 92270f14db94..f623feb2129f 100644
--- a/arch/loongarch/kernel/stacktrace.c
+++ b/arch/loongarch/kernel/stacktrace.c
@@ -32,7 +32,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 	}
 
 	for (unwind_start(&state, task, regs);
-	     !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
+	     !unwind_done(&state); unwind_next_frame(&state)) {
 		addr = unwind_get_return_address(&state);
 		if (!addr || !consume_entry(cookie, addr))
 			break;
diff --git a/arch/loongarch/kernel/unwind.c b/arch/loongarch/kernel/unwind.c
index ba324ba76fa1..a463d6961344 100644
--- a/arch/loongarch/kernel/unwind.c
+++ b/arch/loongarch/kernel/unwind.c
@@ -28,6 +28,5 @@ bool default_next_frame(struct unwind_state *state)
 
 	} while (!get_stack_info(state->sp, state->task, info));
 
-	state->error = true;
 	return false;
 }
diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
index 55afc27320e1..929ae240280a 100644
--- a/arch/loongarch/kernel/unwind_prologue.c
+++ b/arch/loongarch/kernel/unwind_prologue.c
@@ -227,7 +227,7 @@ static bool next_frame(struct unwind_state *state)
 	} while (!get_stack_info(state->sp, state->task, info));
 
 out:
-	state->error = true;
+	state->stack_info.type = STACK_TYPE_UNKNOWN;
 	return false;
 }
 
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 169ff8b3915e..4fcd6cd6da23 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -480,10 +480,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 		case 8:
 			move_reg(ctx, t1, src);
 			emit_insn(ctx, extwb, dst, t1);
+			emit_zext_32(ctx, dst, is32);
 			break;
 		case 16:
 			move_reg(ctx, t1, src);
 			emit_insn(ctx, extwh, dst, t1);
+			emit_zext_32(ctx, dst, is32);
 			break;
 		case 32:
 			emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO);
@@ -772,8 +774,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 			break;
 		case 32:
 			emit_insn(ctx, revb2w, dst, dst);
-			/* zero-extend 32 bits into 64 bits */
-			emit_zext_32(ctx, dst, is32);
+			/* clear the upper 32 bits */
+			emit_zext_32(ctx, dst, true);
 			break;
 		case 64:
 			emit_insn(ctx, revbd, dst, dst);
@@ -911,8 +913,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 
 	/* function return */
 	case BPF_JMP | BPF_EXIT:
-		emit_sext_32(ctx, regmap[BPF_REG_0], true);
-
 		if (i == ctx->prog->len - 1)
 			break;
 
@@ -988,14 +988,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 			}
 			break;
 		case BPF_DW:
-			if (is_signed_imm12(off)) {
-				emit_insn(ctx, ldd, dst, src, off);
-			} else if (is_signed_imm14(off)) {
-				emit_insn(ctx, ldptrd, dst, src, off);
-			} else {
-				move_imm(ctx, t1, off, is32);
-				emit_insn(ctx, ldxd, dst, src, t1);
-			}
+			move_imm(ctx, t1, off, is32);
+			emit_insn(ctx, ldxd, dst, src, t1);
 			break;
 		}
 
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 76db82542519..797ae590ebdb 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -460,6 +460,7 @@ config MACH_LOONGSON2EF
 
 config MACH_LOONGSON64
 	bool "Loongson 64-bit family of machines"
+	select ARCH_DMA_DEFAULT_COHERENT
 	select ARCH_SPARSEMEM_ENABLE
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
@@ -1251,6 +1252,7 @@ config CPU_LOONGSON64
 	select CPU_SUPPORTS_MSA
 	select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
 	select CPU_MIPSR2_IRQ_VI
+	select DMA_NONCOHERENT
 	select WEAK_ORDERING
 	select WEAK_REORDERING_BEYOND_LLSC
 	select MIPS_ASID_BITS_VARIABLE
diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h
index 035b1a69e2d0..e007edd6b60a 100644
--- a/arch/mips/include/asm/mach-loongson64/boot_param.h
+++ b/arch/mips/include/asm/mach-loongson64/boot_param.h
@@ -14,7 +14,11 @@
 #define ADAPTER_ROM		8
 #define ACPI_TABLE		9
 #define SMBIOS_TABLE		10
-#define MAX_MEMORY_TYPE		11
+#define UMA_VIDEO_RAM		11
+#define VUMA_VIDEO_RAM		12
+#define MAX_MEMORY_TYPE		13
+
+#define MEM_SIZE_IS_IN_BYTES	(1 << 31)
 
 #define LOONGSON3_BOOT_MEM_MAP_MAX 128
 struct efi_memory_map_loongson {
@@ -117,7 +121,8 @@ struct irq_source_routing_table {
 	u64 pci_io_start_addr;
 	u64 pci_io_end_addr;
 	u64 pci_config_addr;
-	u32 dma_mask_bits;
+	u16 dma_mask_bits;
+	u16 dma_noncoherent;
 } __packed;
 
 struct interface_info {
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 5387ed0a5186..b630604c577f 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -121,6 +121,19 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	/*  Put the stack after the struct pt_regs.  */
 	childksp = (unsigned long) childregs;
 	p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK;
+
+	/*
+	 * New tasks lose permission to use the fpu. This accelerates context
+	 * switching for most programs since they don't use the fpu.
+	 */
+	clear_tsk_thread_flag(p, TIF_USEDFPU);
+	clear_tsk_thread_flag(p, TIF_USEDMSA);
+	clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE);
+
+#ifdef CONFIG_MIPS_MT_FPAFF
+	clear_tsk_thread_flag(p, TIF_FPUBOUND);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
 	if (unlikely(args->fn)) {
 		/* kernel thread */
 		unsigned long status = p->thread.cp0_status;
@@ -149,20 +162,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	p->thread.reg29 = (unsigned long) childregs;
 	p->thread.reg31 = (unsigned long) ret_from_fork;
 
-	/*
-	 * New tasks lose permission to use the fpu. This accelerates context
-	 * switching for most programs since they don't use the fpu.
-	 */
 	childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
 
-	clear_tsk_thread_flag(p, TIF_USEDFPU);
-	clear_tsk_thread_flag(p, TIF_USEDMSA);
-	clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE);
-
-#ifdef CONFIG_MIPS_MT_FPAFF
-	clear_tsk_thread_flag(p, TIF_FPUBOUND);
-#endif /* CONFIG_MIPS_MT_FPAFF */
-
 #ifdef CONFIG_MIPS_FP_SUPPORT
 	atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE);
 #endif
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 8fbef537fb88..82e2e051b416 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -351,10 +351,11 @@ early_initcall(mips_smp_ipi_init);
  */
 asmlinkage void start_secondary(void)
 {
-	unsigned int cpu;
+	unsigned int cpu = raw_smp_processor_id();
 
 	cpu_probe();
 	per_cpu_trap_init(false);
+	rcutree_report_cpu_starting(cpu);
 	mips_clockevent_init();
 	mp_ops->init_secondary();
 	cpu_report();
@@ -366,7 +367,6 @@ asmlinkage void start_secondary(void)
 	 */
 
 	calibrate_delay();
-	cpu = smp_processor_id();
 	cpu_data[cpu].udelay_val = loops_per_jiffy;
 
 	set_cpu_sibling_map(cpu);
diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c
index c961e2999f15..ef3750a6ffac 100644
--- a/arch/mips/loongson64/env.c
+++ b/arch/mips/loongson64/env.c
@@ -13,6 +13,8 @@
  * Copyright (C) 2009 Lemote Inc.
  * Author: Wu Zhangjin, wuzhangjin@gmail.com
  */
+
+#include <linux/dma-map-ops.h>
 #include <linux/export.h>
 #include <linux/pci_ids.h>
 #include <asm/bootinfo.h>
@@ -147,8 +149,14 @@ void __init prom_lefi_init_env(void)
 
 	loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits;
 	if (loongson_sysconf.dma_mask_bits < 32 ||
-		loongson_sysconf.dma_mask_bits > 64)
+			loongson_sysconf.dma_mask_bits > 64) {
 		loongson_sysconf.dma_mask_bits = 32;
+		dma_default_coherent = true;
+	} else {
+		dma_default_coherent = !eirq_source->dma_noncoherent;
+	}
+
+	pr_info("Firmware: Coherent DMA: %s\n", dma_default_coherent ? "on" : "off");
 
 	loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm;
 	loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown;
diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c
index ee8de1735b7c..f25caa6aa9d3 100644
--- a/arch/mips/loongson64/init.c
+++ b/arch/mips/loongson64/init.c
@@ -49,8 +49,7 @@ void virtual_early_config(void)
 void __init szmem(unsigned int node)
 {
 	u32 i, mem_type;
-	static unsigned long num_physpages;
-	u64 node_id, node_psize, start_pfn, end_pfn, mem_start, mem_size;
+	phys_addr_t node_id, mem_start, mem_size;
 
 	/* Otherwise come from DTB */
 	if (loongson_sysconf.fw_interface != LOONGSON_LEFI)
@@ -64,30 +63,46 @@ void __init szmem(unsigned int node)
 
 		mem_type = loongson_memmap->map[i].mem_type;
 		mem_size = loongson_memmap->map[i].mem_size;
-		mem_start = loongson_memmap->map[i].mem_start;
+
+		/* Memory size comes in MB if MEM_SIZE_IS_IN_BYTES not set */
+		if (mem_size & MEM_SIZE_IS_IN_BYTES)
+			mem_size &= ~MEM_SIZE_IS_IN_BYTES;
+		else
+			mem_size = mem_size << 20;
+
+		mem_start = (node_id << 44) | loongson_memmap->map[i].mem_start;
 
 		switch (mem_type) {
 		case SYSTEM_RAM_LOW:
 		case SYSTEM_RAM_HIGH:
-			start_pfn = ((node_id << 44) + mem_start) >> PAGE_SHIFT;
-			node_psize = (mem_size << 20) >> PAGE_SHIFT;
-			end_pfn  = start_pfn + node_psize;
-			num_physpages += node_psize;
-			pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
-				(u32)node_id, mem_type, mem_start, mem_size);
-			pr_info("       start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
-				start_pfn, end_pfn, num_physpages);
-			memblock_add_node(PFN_PHYS(start_pfn),
-					  PFN_PHYS(node_psize), node,
+		case UMA_VIDEO_RAM:
+			pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes usable\n",
+				(u32)node_id, mem_type, &mem_start, &mem_size);
+			memblock_add_node(mem_start, mem_size, node,
 					  MEMBLOCK_NONE);
 			break;
 		case SYSTEM_RAM_RESERVED:
-			pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
-				(u32)node_id, mem_type, mem_start, mem_size);
-			memblock_reserve(((node_id << 44) + mem_start), mem_size << 20);
+		case VIDEO_ROM:
+		case ADAPTER_ROM:
+		case ACPI_TABLE:
+		case SMBIOS_TABLE:
+			pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes reserved\n",
+				(u32)node_id, mem_type, &mem_start, &mem_size);
+			memblock_reserve(mem_start, mem_size);
+			break;
+		/* We should not reserve VUMA_VIDEO_RAM as it overlaps with MMIO */
+		case VUMA_VIDEO_RAM:
+		default:
+			pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes unhandled\n",
+				(u32)node_id, mem_type, &mem_start, &mem_size);
 			break;
 		}
 	}
+
+	/* Reserve vgabios if it comes from firmware */
+	if (loongson_sysconf.vgabios_addr)
+		memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr),
+				SZ_256K);
 }
 
 #ifndef CONFIG_NUMA
diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h
index 1641ff9a8b83..833555f74ffa 100644
--- a/arch/parisc/include/asm/bug.h
+++ b/arch/parisc/include/asm/bug.h
@@ -71,7 +71,7 @@
 		asm volatile("\n"					\
 			     "1:\t" PARISC_BUG_BREAK_ASM "\n"		\
 			     "\t.pushsection __bug_table,\"a\"\n"	\
-			     "\t.align %2\n"				\
+			     "\t.align 4\n"				\
 			     "2:\t" __BUG_REL(1b) "\n"			\
 			     "\t.short %0\n"				\
 			     "\t.blockz %1-4-2\n"			\
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 6a9acfb690c9..2f8f3f93cbb6 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -23,6 +23,15 @@
 #include <asm/feature-fixups.h>
 
 #ifdef CONFIG_VSX
+#define __REST_1FPVSR(n,c,base)						\
+BEGIN_FTR_SECTION							\
+	b	2f;							\
+END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
+	REST_FPR(n,base);						\
+	b	3f;							\
+2:	REST_VSR(n,c,base);						\
+3:
+
 #define __REST_32FPVSRS(n,c,base)					\
 BEGIN_FTR_SECTION							\
 	b	2f;							\
@@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
 2:	SAVE_32VSRS(n,c,base);						\
 3:
 #else
+#define __REST_1FPVSR(n,b,base)		REST_FPR(n, base)
 #define __REST_32FPVSRS(n,b,base)	REST_32FPRS(n, base)
 #define __SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n, base)
 #endif
+#define REST_1FPVSR(n,c,base)   __REST_1FPVSR(n,__REG_##c,__REG_##base)
 #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
 #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
 
@@ -67,6 +78,7 @@ _GLOBAL(store_fp_state)
 	SAVE_32FPVSRS(0, R4, R3)
 	mffs	fr0
 	stfd	fr0,FPSTATE_FPSCR(r3)
+	REST_1FPVSR(0, R4, R3)
 	blr
 EXPORT_SYMBOL(store_fp_state)
 
@@ -138,4 +150,5 @@ _GLOBAL(save_fpu)
 2:	SAVE_32FPVSRS(0, R4, R6)
 	mffs	fr0
 	stfd	fr0,FPSTATE_FPSCR(r6)
+	REST_1FPVSR(0, R4, R6)
 	blr
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 392404688cec..9452a54d356c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1198,11 +1198,11 @@ void kvmppc_save_user_regs(void)
 
 	usermsr = current->thread.regs->msr;
 
+	/* Caller has enabled FP/VEC/VSX/TM in MSR */
 	if (usermsr & MSR_FP)
-		save_fpu(current);
-
+		__giveup_fpu(current);
 	if (usermsr & MSR_VEC)
-		save_altivec(current);
+		__giveup_altivec(current);
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	if (usermsr & MSR_TM) {
diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S
index 90701885762c..40677416d7b2 100644
--- a/arch/powerpc/kernel/trace/ftrace_entry.S
+++ b/arch/powerpc/kernel/trace/ftrace_entry.S
@@ -62,7 +62,7 @@
 	.endif
 
 	/* Save previous stack pointer (r1) */
-	addi	r8, r1, SWITCH_FRAME_SIZE
+	addi	r8, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
 	PPC_STL	r8, GPR1(r1)
 
 	.if \allregs == 1
@@ -182,7 +182,7 @@ ftrace_no_trace:
 	mflr	r3
 	mtctr	r3
 	REST_GPR(3, r1)
-	addi	r1, r1, SWITCH_FRAME_SIZE
+	addi	r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
 	mtlr	r0
 	bctr
 #endif
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 4094e4c4c77a..80b3f6e476b6 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -33,6 +33,7 @@ _GLOBAL(store_vr_state)
 	mfvscr	v0
 	li	r4, VRSTATE_VSCR
 	stvx	v0, r4, r3
+	lvx	v0, 0, r3
 	blr
 EXPORT_SYMBOL(store_vr_state)
 
@@ -109,6 +110,7 @@ _GLOBAL(save_altivec)
 	mfvscr	v0
 	li	r4,VRSTATE_VSCR
 	stvx	v0,r4,r7
+	lvx	v0,0,r7
 	blr
 
 #ifdef CONFIG_VSX
diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
index 90b261114763..dce96f27cc89 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
@@ -8,9 +8,6 @@
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/leds/common.h>
 
-/* Clock frequency (in Hz) of the rtcclk */
-#define RTCCLK_FREQ		1000000
-
 / {
 	model = "Microchip PolarFire-SoC Icicle Kit";
 	compatible = "microchip,mpfs-icicle-reference-rtlv2210", "microchip,mpfs-icicle-kit",
@@ -29,10 +26,6 @@
 		stdout-path = "serial1:115200n8";
 	};
 
-	cpus {
-		timebase-frequency = <RTCCLK_FREQ>;
-	};
-
 	leds {
 		compatible = "gpio-leds";
 
diff --git a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts
index 184cb36a175e..a8d623ee9fa4 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts
@@ -10,9 +10,6 @@
 #include "mpfs.dtsi"
 #include "mpfs-m100pfs-fabric.dtsi"
 
-/* Clock frequency (in Hz) of the rtcclk */
-#define MTIMER_FREQ	1000000
-
 / {
 	model = "Aries Embedded M100PFEVPS";
 	compatible = "aries,m100pfsevp", "microchip,mpfs";
@@ -33,10 +30,6 @@
 		stdout-path = "serial1:115200n8";
 	};
 
-	cpus {
-		timebase-frequency = <MTIMER_FREQ>;
-	};
-
 	ddrc_cache_lo: memory@80000000 {
 		device_type = "memory";
 		reg = <0x0 0x80000000 0x0 0x40000000>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
index c87cc2d8fe29..ea0808ab1042 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
@@ -6,9 +6,6 @@
 #include "mpfs.dtsi"
 #include "mpfs-polarberry-fabric.dtsi"
 
-/* Clock frequency (in Hz) of the rtcclk */
-#define MTIMER_FREQ	1000000
-
 / {
 	model = "Sundance PolarBerry";
 	compatible = "sundance,polarberry", "microchip,mpfs";
@@ -22,10 +19,6 @@
 		stdout-path = "serial0:115200n8";
 	};
 
-	cpus {
-		timebase-frequency = <MTIMER_FREQ>;
-	};
-
 	ddrc_cache_lo: memory@80000000 {
 		device_type = "memory";
 		reg = <0x0 0x80000000 0x0 0x2e000000>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts
index 013cb666c72d..f9a890579438 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts
@@ -6,9 +6,6 @@
 #include "mpfs.dtsi"
 #include "mpfs-sev-kit-fabric.dtsi"
 
-/* Clock frequency (in Hz) of the rtcclk */
-#define MTIMER_FREQ		1000000
-
 / {
 	#address-cells = <2>;
 	#size-cells = <2>;
@@ -28,10 +25,6 @@
 		stdout-path = "serial1:115200n8";
 	};
 
-	cpus {
-		timebase-frequency = <MTIMER_FREQ>;
-	};
-
 	reserved-memory {
 		#address-cells = <2>;
 		#size-cells = <2>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts
index e0797c7e1b35..d1120f5f2c01 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts
@@ -11,9 +11,6 @@
 #include "mpfs.dtsi"
 #include "mpfs-tysom-m-fabric.dtsi"
 
-/* Clock frequency (in Hz) of the rtcclk */
-#define MTIMER_FREQ		1000000
-
 / {
 	model = "Aldec TySOM-M-MPFS250T-REV2";
 	compatible = "aldec,tysom-m-mpfs250t-rev2", "microchip,mpfs";
@@ -34,10 +31,6 @@
 		stdout-path = "serial1:115200n8";
 	};
 
-	cpus {
-		timebase-frequency = <MTIMER_FREQ>;
-	};
-
 	ddrc_cache_lo: memory@80000000 {
 		device_type = "memory";
 		reg = <0x0 0x80000000 0x0 0x30000000>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi
index a6faf24f1dba..266489d43912 100644
--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
@@ -13,6 +13,7 @@
 	cpus {
 		#address-cells = <1>;
 		#size-cells = <0>;
+		timebase-frequency = <1000000>;
 
 		cpu0: cpu@0 {
 			compatible = "sifive,e51", "sifive,rocket0", "riscv";
diff --git a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi
index df40e87ee063..aec6401a467b 100644
--- a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi
+++ b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi
@@ -34,7 +34,6 @@
 			cpu0_intc: interrupt-controller {
 				compatible = "riscv,cpu-intc";
 				interrupt-controller;
-				#address-cells = <0>;
 				#interrupt-cells = <1>;
 			};
 		};
diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c
index 197db68cc8da..17a904869724 100644
--- a/arch/riscv/errata/andes/errata.c
+++ b/arch/riscv/errata/andes/errata.c
@@ -38,29 +38,35 @@ static long ax45mp_iocp_sw_workaround(void)
 	return ret.error ? 0 : ret.value;
 }
 
-static bool errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid)
+static void errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid)
 {
+	static bool done;
+
 	if (!IS_ENABLED(CONFIG_ERRATA_ANDES_CMO))
-		return false;
+		return;
+
+	if (done)
+		return;
+
+	done = true;
 
 	if (arch_id != ANDESTECH_AX45MP_MARCHID || impid != ANDESTECH_AX45MP_MIMPID)
-		return false;
+		return;
 
 	if (!ax45mp_iocp_sw_workaround())
-		return false;
+		return;
 
 	/* Set this just to make core cbo code happy */
 	riscv_cbom_block_size = 1;
 	riscv_noncoherent_supported();
-
-	return true;
 }
 
 void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
 					      unsigned long archid, unsigned long impid,
 					      unsigned int stage)
 {
-	errata_probe_iocp(stage, archid, impid);
+	if (stage == RISCV_ALTERNATIVES_BOOT)
+		errata_probe_iocp(stage, archid, impid);
 
 	/* we have nothing to patch here ATM so just return back */
 }
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index b77397432403..76ace1e0b46f 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -154,7 +154,6 @@ secondary_start_sbi:
 	XIP_FIXUP_OFFSET a3
 	add a3, a3, a1
 	REG_L sp, (a3)
-	scs_load_current
 
 .Lsecondary_start_common:
 
@@ -165,6 +164,7 @@ secondary_start_sbi:
 	call relocate_enable_mmu
 #endif
 	call .Lsetup_trap_vector
+	scs_load_current
 	tail smp_callin
 #endif /* CONFIG_SMP */
 
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 56a8c78e9e21..aac019ed63b1 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -40,15 +40,6 @@ struct relocation_handlers {
 				  long buffer);
 };
 
-unsigned int initialize_relocation_hashtable(unsigned int num_relocations);
-void process_accumulated_relocations(struct module *me);
-int add_relocation_to_accumulate(struct module *me, int type, void *location,
-				 unsigned int hashtable_bits, Elf_Addr v);
-
-struct hlist_head *relocation_hashtable;
-
-struct list_head used_buckets_list;
-
 /*
  * The auipc+jalr instruction pair can reach any PC-relative offset
  * in the range [-2^31 - 2^11, 2^31 - 2^11)
@@ -64,7 +55,7 @@ static bool riscv_insn_valid_32bit_offset(ptrdiff_t val)
 
 static int riscv_insn_rmw(void *location, u32 keep, u32 set)
 {
-	u16 *parcel = location;
+	__le16 *parcel = location;
 	u32 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16;
 
 	insn &= keep;
@@ -77,7 +68,7 @@ static int riscv_insn_rmw(void *location, u32 keep, u32 set)
 
 static int riscv_insn_rvc_rmw(void *location, u16 keep, u16 set)
 {
-	u16 *parcel = location;
+	__le16 *parcel = location;
 	u16 insn = le16_to_cpu(*parcel);
 
 	insn &= keep;
@@ -604,7 +595,10 @@ static const struct relocation_handlers reloc_handlers[] = {
 	/* 192-255 nonstandard ABI extensions  */
 };
 
-void process_accumulated_relocations(struct module *me)
+static void
+process_accumulated_relocations(struct module *me,
+				struct hlist_head **relocation_hashtable,
+				struct list_head *used_buckets_list)
 {
 	/*
 	 * Only ADD/SUB/SET/ULEB128 should end up here.
@@ -624,18 +618,25 @@ void process_accumulated_relocations(struct module *me)
 	 *	- Each relocation entry for a location address
 	 */
 	struct used_bucket *bucket_iter;
+	struct used_bucket *bucket_iter_tmp;
 	struct relocation_head *rel_head_iter;
+	struct hlist_node *rel_head_iter_tmp;
 	struct relocation_entry *rel_entry_iter;
+	struct relocation_entry *rel_entry_iter_tmp;
 	int curr_type;
 	void *location;
 	long buffer;
 
-	list_for_each_entry(bucket_iter, &used_buckets_list, head) {
-		hlist_for_each_entry(rel_head_iter, bucket_iter->bucket, node) {
+	list_for_each_entry_safe(bucket_iter, bucket_iter_tmp,
+				 used_buckets_list, head) {
+		hlist_for_each_entry_safe(rel_head_iter, rel_head_iter_tmp,
+					  bucket_iter->bucket, node) {
 			buffer = 0;
 			location = rel_head_iter->location;
-			list_for_each_entry(rel_entry_iter,
-					    rel_head_iter->rel_entry, head) {
+			list_for_each_entry_safe(rel_entry_iter,
+						 rel_entry_iter_tmp,
+						 rel_head_iter->rel_entry,
+						 head) {
 				curr_type = rel_entry_iter->type;
 				reloc_handlers[curr_type].reloc_handler(
 					me, &buffer, rel_entry_iter->value);
@@ -648,11 +649,14 @@ void process_accumulated_relocations(struct module *me)
 		kfree(bucket_iter);
 	}
 
-	kfree(relocation_hashtable);
+	kfree(*relocation_hashtable);
 }
 
-int add_relocation_to_accumulate(struct module *me, int type, void *location,
-				 unsigned int hashtable_bits, Elf_Addr v)
+static int add_relocation_to_accumulate(struct module *me, int type,
+					void *location,
+					unsigned int hashtable_bits, Elf_Addr v,
+					struct hlist_head *relocation_hashtable,
+					struct list_head *used_buckets_list)
 {
 	struct relocation_entry *entry;
 	struct relocation_head *rel_head;
@@ -661,6 +665,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location,
 	unsigned long hash;
 
 	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+
+	if (!entry)
+		return -ENOMEM;
+
 	INIT_LIST_HEAD(&entry->head);
 	entry->type = type;
 	entry->value = v;
@@ -669,7 +677,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location,
 
 	current_head = &relocation_hashtable[hash];
 
-	/* Find matching location (if any) */
+	/*
+	 * Search for the relocation_head for the relocations that happen at the
+	 * provided location
+	 */
 	bool found = false;
 	struct relocation_head *rel_head_iter;
 
@@ -681,19 +692,45 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location,
 		}
 	}
 
+	/*
+	 * If there has not yet been any relocations at the provided location,
+	 * create a relocation_head for that location and populate it with this
+	 * relocation_entry.
+	 */
 	if (!found) {
 		rel_head = kmalloc(sizeof(*rel_head), GFP_KERNEL);
+
+		if (!rel_head) {
+			kfree(entry);
+			return -ENOMEM;
+		}
+
 		rel_head->rel_entry =
 			kmalloc(sizeof(struct list_head), GFP_KERNEL);
+
+		if (!rel_head->rel_entry) {
+			kfree(entry);
+			kfree(rel_head);
+			return -ENOMEM;
+		}
+
 		INIT_LIST_HEAD(rel_head->rel_entry);
 		rel_head->location = location;
 		INIT_HLIST_NODE(&rel_head->node);
 		if (!current_head->first) {
 			bucket =
 				kmalloc(sizeof(struct used_bucket), GFP_KERNEL);
+
+			if (!bucket) {
+				kfree(entry);
+				kfree(rel_head);
+				kfree(rel_head->rel_entry);
+				return -ENOMEM;
+			}
+
 			INIT_LIST_HEAD(&bucket->head);
 			bucket->bucket = current_head;
-			list_add(&bucket->head, &used_buckets_list);
+			list_add(&bucket->head, used_buckets_list);
 		}
 		hlist_add_head(&rel_head->node, current_head);
 	}
@@ -704,7 +741,9 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location,
 	return 0;
 }
 
-unsigned int initialize_relocation_hashtable(unsigned int num_relocations)
+static unsigned int
+initialize_relocation_hashtable(unsigned int num_relocations,
+				struct hlist_head **relocation_hashtable)
 {
 	/* Can safely assume that bits is not greater than sizeof(long) */
 	unsigned long hashtable_size = roundup_pow_of_two(num_relocations);
@@ -720,12 +759,13 @@ unsigned int initialize_relocation_hashtable(unsigned int num_relocations)
 
 	hashtable_size <<= should_double_size;
 
-	relocation_hashtable = kmalloc_array(hashtable_size,
-					     sizeof(*relocation_hashtable),
-					     GFP_KERNEL);
-	__hash_init(relocation_hashtable, hashtable_size);
+	*relocation_hashtable = kmalloc_array(hashtable_size,
+					      sizeof(*relocation_hashtable),
+					      GFP_KERNEL);
+	if (!*relocation_hashtable)
+		return -ENOMEM;
 
-	INIT_LIST_HEAD(&used_buckets_list);
+	__hash_init(*relocation_hashtable, hashtable_size);
 
 	return hashtable_bits;
 }
@@ -742,7 +782,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 	Elf_Addr v;
 	int res;
 	unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel);
-	unsigned int hashtable_bits = initialize_relocation_hashtable(num_relocations);
+	struct hlist_head *relocation_hashtable;
+	struct list_head used_buckets_list;
+	unsigned int hashtable_bits;
+
+	hashtable_bits = initialize_relocation_hashtable(num_relocations,
+							 &relocation_hashtable);
+
+	if (hashtable_bits < 0)
+		return hashtable_bits;
+
+	INIT_LIST_HEAD(&used_buckets_list);
 
 	pr_debug("Applying relocate section %u to %u\n", relsec,
 	       sechdrs[relsec].sh_info);
@@ -823,14 +873,18 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 		}
 
 		if (reloc_handlers[type].accumulate_handler)
-			res = add_relocation_to_accumulate(me, type, location, hashtable_bits, v);
+			res = add_relocation_to_accumulate(me, type, location,
+							   hashtable_bits, v,
+							   relocation_hashtable,
+							   &used_buckets_list);
 		else
 			res = handler(me, location, v);
 		if (res)
 			return res;
 	}
 
-	process_accumulated_relocations(me);
+	process_accumulated_relocations(me, &relocation_hashtable,
+					&used_buckets_list);
 
 	return 0;
 }
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index c712037dbe10..a2ca5b7756a5 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -169,7 +169,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
 	pair->value &= ~missing;
 }
 
-static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
+static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext)
 {
 	struct riscv_hwprobe pair;
 
diff --git a/arch/riscv/kernel/tests/module_test/test_uleb128.S b/arch/riscv/kernel/tests/module_test/test_uleb128.S
index 90f22049d553..8515ed7cd8c1 100644
--- a/arch/riscv/kernel/tests/module_test/test_uleb128.S
+++ b/arch/riscv/kernel/tests/module_test/test_uleb128.S
@@ -6,13 +6,13 @@
 .text
 .global test_uleb_basic
 test_uleb_basic:
-	ld	a0, second
+	lw	a0, second
 	addi	a0, a0, -127
 	ret
 
 .global test_uleb_large
 test_uleb_large:
-	ld	a0, fourth
+	lw	a0, fourth
 	addi	a0, a0, -0x07e8
 	ret
 
@@ -22,10 +22,10 @@ first:
 second:
 	.reloc second, R_RISCV_SET_ULEB128, second
 	.reloc second, R_RISCV_SUB_ULEB128, first
-	.dword 0
+	.word 0
 third:
 	.space 1000
 fourth:
 	.reloc fourth, R_RISCV_SET_ULEB128, fourth
 	.reloc fourth, R_RISCV_SUB_ULEB128, third
-	.dword 0
+	.word 0
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 5eba37147caa..5255f8134aef 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -550,16 +550,14 @@ int handle_misaligned_store(struct pt_regs *regs)
 	} else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) {
 		len = 8;
 		val.data_ulong = GET_RS2S(insn, regs);
-	} else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP &&
-		   ((insn >> SH_RD) & 0x1f)) {
+	} else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP) {
 		len = 8;
 		val.data_ulong = GET_RS2C(insn, regs);
 #endif
 	} else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) {
 		len = 4;
 		val.data_ulong = GET_RS2S(insn, regs);
-	} else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP &&
-		   ((insn >> SH_RD) & 0x1f)) {
+	} else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP) {
 		len = 4;
 		val.data_ulong = GET_RS2C(insn, regs);
 	} else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) {
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 02dcbe82a8e5..8207a892bbe2 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -587,10 +587,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
 
 	if (!gmap_is_shadow(gmap))
 		return;
-	if (start >= 1UL << 31)
-		/* We are only interested in prefix pages */
-		return;
-
 	/*
 	 * Only new shadow blocks are added to the list during runtime,
 	 * therefore we can safely reference them all the time.
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 3bd2ab2a9a34..5cb92941540b 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -756,7 +756,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 		pte_clear(mm, addr, ptep);
 	}
 	if (reset)
-		pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
+		pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
 	pgste_set_unlock(ptep, pgste);
 	preempt_enable();
 }
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 1b5d17a9f70d..cf1f13c82175 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -10,6 +10,7 @@
 #include <asm/coco.h>
 #include <asm/tdx.h>
 #include <asm/vmx.h>
+#include <asm/ia32.h>
 #include <asm/insn.h>
 #include <asm/insn-eval.h>
 #include <asm/pgtable.h>
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index d813160b14d8..6356060caaf3 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -26,6 +26,7 @@
 #include <xen/events.h>
 #endif
 
+#include <asm/apic.h>
 #include <asm/desc.h>
 #include <asm/traps.h>
 #include <asm/vdso.h>
@@ -167,7 +168,96 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
 	}
 }
 
-/* Handles int $0x80 */
+#ifdef CONFIG_IA32_EMULATION
+static __always_inline bool int80_is_external(void)
+{
+	const unsigned int offs = (0x80 / 32) * 0x10;
+	const u32 bit = BIT(0x80 % 32);
+
+	/* The local APIC on XENPV guests is fake */
+	if (cpu_feature_enabled(X86_FEATURE_XENPV))
+		return false;
+
+	/*
+	 * If vector 0x80 is set in the APIC ISR then this is an external
+	 * interrupt. Either from broken hardware or injected by a VMM.
+	 *
+	 * Note: In guest mode this is only valid for secure guests where
+	 * the secure module fully controls the vAPIC exposed to the guest.
+	 */
+	return apic_read(APIC_ISR + offs) & bit;
+}
+
+/**
+ * int80_emulation - 32-bit legacy syscall entry
+ *
+ * This entry point can be used by 32-bit and 64-bit programs to perform
+ * 32-bit system calls.  Instances of INT $0x80 can be found inline in
+ * various programs and libraries.  It is also used by the vDSO's
+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
+ * entry method.  Restarted 32-bit system calls also fall back to INT
+ * $0x80 regardless of what instruction was originally used to do the
+ * system call.
+ *
+ * This is considered a slow path.  It is not used by most libc
+ * implementations on modern hardware except during process startup.
+ *
+ * The arguments for the INT $0x80 based syscall are on stack in the
+ * pt_regs structure:
+ *   eax:				system call number
+ *   ebx, ecx, edx, esi, edi, ebp:	arg1 - arg 6
+ */
+DEFINE_IDTENTRY_RAW(int80_emulation)
+{
+	int nr;
+
+	/* Kernel does not use INT $0x80! */
+	if (unlikely(!user_mode(regs))) {
+		irqentry_enter(regs);
+		instrumentation_begin();
+		panic("Unexpected external interrupt 0x80\n");
+	}
+
+	/*
+	 * Establish kernel context for instrumentation, including for
+	 * int80_is_external() below which calls into the APIC driver.
+	 * Identical for soft and external interrupts.
+	 */
+	enter_from_user_mode(regs);
+
+	instrumentation_begin();
+	add_random_kstack_offset();
+
+	/* Validate that this is a soft interrupt to the extent possible */
+	if (unlikely(int80_is_external()))
+		panic("Unexpected external interrupt 0x80\n");
+
+	/*
+	 * The low level idtentry code pushed -1 into regs::orig_ax
+	 * and regs::ax contains the syscall number.
+	 *
+	 * User tracing code (ptrace or signal handlers) might assume
+	 * that the regs::orig_ax contains a 32-bit number on invoking
+	 * a 32-bit syscall.
+	 *
+	 * Establish the syscall convention by saving the 32bit truncated
+	 * syscall number in regs::orig_ax and by invalidating regs::ax.
+	 */
+	regs->orig_ax = regs->ax & GENMASK(31, 0);
+	regs->ax = -ENOSYS;
+
+	nr = syscall_32_enter(regs);
+
+	local_irq_enable();
+	nr = syscall_enter_from_user_mode_work(regs, nr);
+	do_syscall_32_irqs_on(regs, nr);
+
+	instrumentation_end();
+	syscall_exit_to_user_mode(regs);
+}
+#else /* CONFIG_IA32_EMULATION */
+
+/* Handles int $0x80 on a 32bit kernel */
 __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
 {
 	int nr = syscall_32_enter(regs);
@@ -186,6 +276,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
 	instrumentation_end();
 	syscall_exit_to_user_mode(regs);
 }
+#endif /* !CONFIG_IA32_EMULATION */
 
 static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
 {
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 27c05d08558a..de94e2e84ecc 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -275,80 +275,3 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
 	ANNOTATE_NOENDBR
 	int3
 SYM_CODE_END(entry_SYSCALL_compat)
-
-/*
- * 32-bit legacy system call entry.
- *
- * 32-bit x86 Linux system calls traditionally used the INT $0x80
- * instruction.  INT $0x80 lands here.
- *
- * This entry point can be used by 32-bit and 64-bit programs to perform
- * 32-bit system calls.  Instances of INT $0x80 can be found inline in
- * various programs and libraries.  It is also used by the vDSO's
- * __kernel_vsyscall fallback for hardware that doesn't support a faster
- * entry method.  Restarted 32-bit system calls also fall back to INT
- * $0x80 regardless of what instruction was originally used to do the
- * system call.
- *
- * This is considered a slow path.  It is not used by most libc
- * implementations on modern hardware except during process startup.
- *
- * Arguments:
- * eax  system call number
- * ebx  arg1
- * ecx  arg2
- * edx  arg3
- * esi  arg4
- * edi  arg5
- * ebp  arg6
- */
-SYM_CODE_START(entry_INT80_compat)
-	UNWIND_HINT_ENTRY
-	ENDBR
-	/*
-	 * Interrupts are off on entry.
-	 */
-	ASM_CLAC			/* Do this early to minimize exposure */
-	ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
-
-	/*
-	 * User tracing code (ptrace or signal handlers) might assume that
-	 * the saved RAX contains a 32-bit number when we're invoking a 32-bit
-	 * syscall.  Just in case the high bits are nonzero, zero-extend
-	 * the syscall number.  (This could almost certainly be deleted
-	 * with no ill effects.)
-	 */
-	movl	%eax, %eax
-
-	/* switch to thread stack expects orig_ax and rdi to be pushed */
-	pushq	%rax			/* pt_regs->orig_ax */
-
-	/* Need to switch before accessing the thread stack. */
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
-
-	/* In the Xen PV case we already run on the thread stack. */
-	ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
-
-	movq	%rsp, %rax
-	movq	PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
-
-	pushq	5*8(%rax)		/* regs->ss */
-	pushq	4*8(%rax)		/* regs->rsp */
-	pushq	3*8(%rax)		/* regs->eflags */
-	pushq	2*8(%rax)		/* regs->cs */
-	pushq	1*8(%rax)		/* regs->ip */
-	pushq	0*8(%rax)		/* regs->orig_ax */
-.Lint80_keep_stack:
-
-	PUSH_AND_CLEAR_REGS rax=$-ENOSYS
-	UNWIND_HINT_REGS
-
-	cld
-
-	IBRS_ENTER
-	UNTRAIN_RET
-
-	movq	%rsp, %rdi
-	call	do_int80_syscall_32
-	jmp	swapgs_restore_regs_and_return_to_usermode
-SYM_CODE_END(entry_INT80_compat)
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 5a2ae24b1204..9805629479d9 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -75,6 +75,11 @@ static inline bool ia32_enabled(void)
 	return __ia32_enabled;
 }
 
+static inline void ia32_disable(void)
+{
+	__ia32_enabled = false;
+}
+
 #else /* !CONFIG_IA32_EMULATION */
 
 static inline bool ia32_enabled(void)
@@ -82,6 +87,8 @@ static inline bool ia32_enabled(void)
 	return IS_ENABLED(CONFIG_X86_32);
 }
 
+static inline void ia32_disable(void) {}
+
 #endif
 
 #endif /* _ASM_X86_IA32_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 05fd175cec7d..13639e57e1f8 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -569,6 +569,10 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_UD,		exc_invalid_op);
 DECLARE_IDTENTRY_RAW(X86_TRAP_BP,		exc_int3);
 DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF,	exc_page_fault);
 
+#if defined(CONFIG_IA32_EMULATION)
+DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR,	int80_emulation);
+#endif
+
 #ifdef CONFIG_X86_MCE
 #ifdef CONFIG_X86_64
 DECLARE_IDTENTRY_MCE(X86_TRAP_MC,	exc_machine_check);
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 4d84122bd643..484f4f0131a5 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -32,10 +32,6 @@ void entry_SYSCALL_compat(void);
 void entry_SYSCALL_compat_safe_stack(void);
 void entry_SYSRETL_compat_unsafe_stack(void);
 void entry_SYSRETL_compat_end(void);
-void entry_INT80_compat(void);
-#ifdef CONFIG_XEN_PV
-void xen_entry_INT80_compat(void);
-#endif
 #else /* !CONFIG_IA32_EMULATION */
 #define entry_SYSCALL_compat NULL
 #define entry_SYSENTER_compat NULL
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a7eab05e5f29..f322ebd053a9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1320,6 +1320,9 @@ static void zenbleed_check_cpu(void *unused)
 
 void amd_check_microcode(void)
 {
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+		return;
+
 	on_each_cpu(zenbleed_check_cpu, NULL, 1);
 }
 
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 8857abc706e4..660b601f1d6c 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -121,7 +121,7 @@ static const __initconst struct idt_data def_idts[] = {
 
 static const struct idt_data ia32_idt[] __initconst = {
 #if defined(CONFIG_IA32_EMULATION)
-	SYSG(IA32_SYSCALL_VECTOR,	entry_INT80_compat),
+	SYSG(IA32_SYSCALL_VECTOR,	asm_int80_emulation),
 #elif defined(CONFIG_X86_32)
 	SYSG(IA32_SYSCALL_VECTOR,	entry_INT80_32),
 #endif
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 70472eebe719..c67285824e82 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -1234,10 +1234,6 @@ void setup_ghcb(void)
 	if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
 		return;
 
-	/* First make sure the hypervisor talks a supported protocol. */
-	if (!sev_es_negotiate_protocol())
-		sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
-
 	/*
 	 * Check whether the runtime #VC exception handler is active. It uses
 	 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
@@ -1255,6 +1251,13 @@ void setup_ghcb(void)
 	}
 
 	/*
+	 * Make sure the hypervisor talks a supported protocol.
+	 * This gets called only in the BSP boot phase.
+	 */
+	if (!sev_es_negotiate_protocol())
+		sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+
+	/*
 	 * Clear the boot_ghcb. The first exception comes in before the bss
 	 * section is cleared.
 	 */
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index ee8c4c3496ed..eea6ea7f14af 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -182,6 +182,7 @@ static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations mmu_rmaps_stat_fops = {
+	.owner		= THIS_MODULE,
 	.open		= kvm_mmu_rmaps_stat_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 712146312358..f3bb30b40876 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1855,15 +1855,17 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	bool old_paging = is_paging(vcpu);
 
 #ifdef CONFIG_X86_64
-	if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
+	if (vcpu->arch.efer & EFER_LME) {
 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
 			vcpu->arch.efer |= EFER_LMA;
-			svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
+			if (!vcpu->arch.guest_state_protected)
+				svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
 		}
 
 		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
 			vcpu->arch.efer &= ~EFER_LMA;
-			svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
+			if (!vcpu->arch.guest_state_protected)
+				svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
 		}
 	}
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2c924075f6f1..1a3aaa7dafae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5518,8 +5518,8 @@ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 					 struct kvm_xsave *guest_xsave)
 {
-	return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
-					     sizeof(guest_xsave->region));
+	kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
+				      sizeof(guest_xsave->region));
 }
 
 static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
@@ -13031,7 +13031,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.guest_state_protected)
 		return true;
 
-	return vcpu->arch.preempted_in_kernel;
+	if (vcpu != kvm_get_running_vcpu())
+		return vcpu->arch.preempted_in_kernel;
+
+	return static_call(kvm_x86_get_cpl)(vcpu) == 0;
 }
 
 unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index a68f2dda0948..70b91de2e053 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -32,6 +32,7 @@
 #include <asm/msr.h>
 #include <asm/cmdline.h>
 #include <asm/sev.h>
+#include <asm/ia32.h>
 
 #include "mm_internal.h"
 
@@ -481,6 +482,16 @@ void __init sme_early_init(void)
 	 */
 	if (sev_status & MSR_AMD64_SEV_ES_ENABLED)
 		x86_cpuinit.parallel_bringup = false;
+
+	/*
+	 * The VMM is capable of injecting interrupt 0x80 and triggering the
+	 * compatibility syscall path.
+	 *
+	 * By default, the 32-bit emulation is disabled in order to ensure
+	 * the safety of the VM.
+	 */
+	if (sev_status & MSR_AMD64_SEV_ENABLED)
+		ia32_disable();
 }
 
 void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8c10d9abc239..e89e415aa743 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -3025,3 +3025,49 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp
 #endif
 	WARN(1, "verification of programs using bpf_throw should have failed\n");
 }
+
+void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
+			       struct bpf_prog *new, struct bpf_prog *old)
+{
+	u8 *old_addr, *new_addr, *old_bypass_addr;
+	int ret;
+
+	old_bypass_addr = old ? NULL : poke->bypass_addr;
+	old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
+	new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
+
+	/*
+	 * On program loading or teardown, the program's kallsym entry
+	 * might not be in place, so we use __bpf_arch_text_poke to skip
+	 * the kallsyms check.
+	 */
+	if (new) {
+		ret = __bpf_arch_text_poke(poke->tailcall_target,
+					   BPF_MOD_JUMP,
+					   old_addr, new_addr);
+		BUG_ON(ret < 0);
+		if (!old) {
+			ret = __bpf_arch_text_poke(poke->tailcall_bypass,
+						   BPF_MOD_JUMP,
+						   poke->bypass_addr,
+						   NULL);
+			BUG_ON(ret < 0);
+		}
+	} else {
+		ret = __bpf_arch_text_poke(poke->tailcall_bypass,
+					   BPF_MOD_JUMP,
+					   old_bypass_addr,
+					   poke->bypass_addr);
+		BUG_ON(ret < 0);
+		/* let other CPUs finish the execution of program
+		 * so that it will not possible to expose them
+		 * to invalid nop, stack unwind, nop state
+		 */
+		if (!ret)
+			synchronize_rcu();
+		ret = __bpf_arch_text_poke(poke->tailcall_target,
+					   BPF_MOD_JUMP,
+					   old_addr, NULL);
+		BUG_ON(ret < 0);
+	}
+}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0337392a3121..3c61bb98c10e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -33,9 +33,12 @@ EXPORT_SYMBOL_GPL(hypercall_page);
  * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info
  * but during boot it is switched to point to xen_vcpu_info.
  * The pointer is used in xen_evtchn_do_upcall to acknowledge pending events.
+ * Make sure that xen_vcpu_info doesn't cross a page boundary by making it
+ * cache-line aligned (the struct is guaranteed to have a size of 64 bytes,
+ * which matches the cache line size of 64-bit x86 processors).
  */
 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
-DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
+DEFINE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info);
 
 /* Linux <-> Xen vCPU id mapping */
 DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
@@ -160,6 +163,7 @@ void xen_vcpu_setup(int cpu)
 	int err;
 	struct vcpu_info *vcpup;
 
+	BUILD_BUG_ON(sizeof(*vcpup) > SMP_CACHE_BYTES);
 	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
 
 	/*
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index bbbfdd495ebd..aeb33e0a3f76 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -704,7 +704,7 @@ static struct trap_array_entry trap_array[] = {
 	TRAP_ENTRY(exc_int3,				false ),
 	TRAP_ENTRY(exc_overflow,			false ),
 #ifdef CONFIG_IA32_EMULATION
-	{ entry_INT80_compat,          xen_entry_INT80_compat,          false },
+	TRAP_ENTRY(int80_emulation,			false ),
 #endif
 	TRAP_ENTRY(exc_page_fault,			false ),
 	TRAP_ENTRY(exc_divide_error,			false ),
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 9e5e68008785..1a9cd18dfbd3 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -156,7 +156,7 @@ xen_pv_trap asm_xenpv_exc_machine_check
 #endif /* CONFIG_X86_MCE */
 xen_pv_trap asm_exc_simd_coprocessor_error
 #ifdef CONFIG_IA32_EMULATION
-xen_pv_trap entry_INT80_compat
+xen_pv_trap asm_int80_emulation
 #endif
 xen_pv_trap asm_exc_xen_unknown_trap
 xen_pv_trap asm_exc_xen_hypervisor_callback
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 408a2aa66c69..a87ab36889e7 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -21,7 +21,7 @@ extern void *xen_initial_gdt;
 struct trap_info;
 void xen_copy_trap_info(struct trap_info *traps);
 
-DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info);
+DECLARE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info);
 DECLARE_PER_CPU(unsigned long, xen_cr3);
 DECLARE_PER_CPU(unsigned long, xen_current_cr3);
 
diff --git a/block/blk-core.c b/block/blk-core.c
index fdf25b8d6e78..2eca76ccf4ee 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -501,9 +501,17 @@ static inline void bio_check_ro(struct bio *bio)
 	if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
 		if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
 			return;
-		pr_warn_ratelimited("Trying to write to read-only block-device %pg\n",
-				    bio->bi_bdev);
-		/* Older lvm-tools actually trigger this */
+
+		if (bio->bi_bdev->bd_ro_warned)
+			return;
+
+		bio->bi_bdev->bd_ro_warned = true;
+		/*
+		 * Use ioctl to set underlying disk of raid/dm to read-only
+		 * will trigger this.
+		 */
+		pr_warn("Trying to write to read-only block-device %pg\n",
+			bio->bi_bdev);
 	}
 }
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 900c1be1fee1..ac18f802c027 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1512,14 +1512,26 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
 
+static bool blk_is_flush_data_rq(struct request *rq)
+{
+	return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq);
+}
+
 static bool blk_mq_rq_inflight(struct request *rq, void *priv)
 {
 	/*
 	 * If we find a request that isn't idle we know the queue is busy
 	 * as it's checked in the iter.
 	 * Return false to stop the iteration.
+	 *
+	 * In case of queue quiesce, if one flush data request is completed,
+	 * don't count it as inflight given the flush sequence is suspended,
+	 * and the original flush data request is invisible to driver, just
+	 * like other pending requests because of quiesce
 	 */
-	if (blk_mq_request_started(rq)) {
+	if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) &&
+				blk_is_flush_data_rq(rq) &&
+				blk_mq_request_completed(rq))) {
 		bool *busy = priv;
 
 		*busy = true;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 63e481262336..0b2d04766324 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -615,6 +615,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
 QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
 #endif
 
+/* Common attributes for bio-based and request-based queues. */
 static struct attribute *queue_attrs[] = {
 	&queue_ra_entry.attr,
 	&queue_max_hw_sectors_entry.attr,
@@ -659,6 +660,7 @@ static struct attribute *queue_attrs[] = {
 	NULL,
 };
 
+/* Request-based queue attributes that are not relevant for bio-based queues. */
 static struct attribute *blk_mq_queue_attrs[] = {
 	&queue_requests_entry.attr,
 	&elv_iosched_entry.attr,
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 9711e8fc979d..a365791a9f5c 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -853,6 +853,9 @@ static int device_early_init(struct hl_device *hdev)
 		gaudi2_set_asic_funcs(hdev);
 		strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name));
 		break;
+	case ASIC_GAUDI2C:
+		gaudi2_set_asic_funcs(hdev);
+		strscpy(hdev->asic_name, "GAUDI2C", sizeof(hdev->asic_name));
 		break;
 	default:
 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
@@ -1041,18 +1044,21 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
 	return (vendor_id == PCI_VENDOR_ID_HABANALABS);
 }
 
-static void hl_device_eq_heartbeat(struct hl_device *hdev)
+static int hl_device_eq_heartbeat_check(struct hl_device *hdev)
 {
-	u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 
 	if (!prop->cpucp_info.eq_health_check_supported)
-		return;
+		return 0;
 
-	if (hdev->eq_heartbeat_received)
+	if (hdev->eq_heartbeat_received) {
 		hdev->eq_heartbeat_received = false;
-	else
-		hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
+	} else {
+		dev_err(hdev->dev, "EQ heartbeat event was not received!\n");
+		return -EIO;
+	}
+
+	return 0;
 }
 
 static void hl_device_heartbeat(struct work_struct *work)
@@ -1069,10 +1075,9 @@ static void hl_device_heartbeat(struct work_struct *work)
 	/*
 	 * For EQ health check need to check if driver received the heartbeat eq event
 	 * in order to validate the eq is working.
+	 * Only if both the EQ is healthy and we managed to send the next heartbeat reschedule.
 	 */
-	hl_device_eq_heartbeat(hdev);
-
-	if (!hdev->asic_funcs->send_heartbeat(hdev))
+	if ((!hl_device_eq_heartbeat_check(hdev)) && (!hdev->asic_funcs->send_heartbeat(hdev)))
 		goto reschedule;
 
 	if (hl_device_operational(hdev, NULL))
@@ -2035,7 +2040,7 @@ device_reset:
 	if (ctx)
 		hl_ctx_put(ctx);
 
-	return hl_device_reset(hdev, flags);
+	return hl_device_reset(hdev, flags | HL_DRV_RESET_HARD);
 }
 
 static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask)
diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index 47e8384134aa..3558a6a8e192 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -646,39 +646,27 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
 	return rc;
 }
 
-static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
-								u32 sts_val)
+static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, u32 sts_val)
 {
 	bool err_exists = false;
 
 	if (!(err_val & CPU_BOOT_ERR0_ENABLED))
 		return false;
 
-	if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) {
-		dev_err(hdev->dev,
-			"Device boot error - DRAM initialization failed\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
+		dev_err(hdev->dev, "Device boot error - DRAM initialization failed\n");
 
-	if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) {
+	if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
 		dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) {
-		dev_err(hdev->dev,
-			"Device boot error - Thermal Sensor initialization failed\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
+		dev_err(hdev->dev, "Device boot error - Thermal Sensor initialization failed\n");
 
 	if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
 		if (hdev->bmc_enable) {
-			dev_err(hdev->dev,
-				"Device boot error - Skipped waiting for BMC\n");
-			err_exists = true;
+			dev_err(hdev->dev, "Device boot error - Skipped waiting for BMC\n");
 		} else {
-			dev_info(hdev->dev,
-				"Device boot message - Skipped waiting for BMC\n");
+			dev_info(hdev->dev, "Device boot message - Skipped waiting for BMC\n");
 			/* This is an info so we don't want it to disable the
 			 * device
 			 */
@@ -686,48 +674,29 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
 		}
 	}
 
-	if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) {
-		dev_err(hdev->dev,
-			"Device boot error - Serdes data from BMC not available\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
+		dev_err(hdev->dev, "Device boot error - Serdes data from BMC not available\n");
 
-	if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) {
-		dev_err(hdev->dev,
-			"Device boot error - NIC F/W initialization failed\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
+		dev_err(hdev->dev, "Device boot error - NIC F/W initialization failed\n");
 
-	if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) {
-		dev_err(hdev->dev,
-			"Device boot warning - security not ready\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
+		dev_err(hdev->dev, "Device boot warning - security not ready\n");
 
-	if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
 		dev_err(hdev->dev, "Device boot error - security failure\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
 		dev_err(hdev->dev, "Device boot error - eFuse failure\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL)
 		dev_err(hdev->dev, "Device boot error - Failed to load preboot secondary image\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_PLL_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
 		dev_err(hdev->dev, "Device boot error - PLL failure\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL)
 		dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n");
-		err_exists = true;
-	}
 
 	if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
 		/* Ignore this bit, don't prevent driver loading */
@@ -735,52 +704,32 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
 		err_val &= ~CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL;
 	}
 
-	if (err_val & CPU_BOOT_ERR0_BINNING_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_BINNING_FAIL)
 		dev_err(hdev->dev, "Device boot error - binning failure\n");
-		err_exists = true;
-	}
 
 	if (sts_val & CPU_BOOT_DEV_STS0_ENABLED)
 		dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val);
 
+	if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
+		dev_err(hdev->dev, "Device boot warning - Skipped DRAM initialization\n");
+
+	if (err_val & CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL)
+		dev_err(hdev->dev, "Device boot error - ARC memory scrub failed\n");
+
+	/* All warnings should go here in order not to reach the unknown error validation */
 	if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) {
 		dev_err(hdev->dev, "Device boot error - EEPROM failure detected\n");
 		err_exists = true;
 	}
 
-	/* All warnings should go here in order not to reach the unknown error validation */
-	if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
-		dev_warn(hdev->dev,
-			"Device boot warning - Skipped DRAM initialization\n");
-		/* This is a warning so we don't want it to disable the
-		 * device
-		 */
-		err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED;
-	}
+	if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL)
+		dev_warn(hdev->dev, "Device boot warning - Failed to load preboot primary image\n");
 
-	if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL) {
-		dev_warn(hdev->dev,
-			"Device boot warning - Failed to load preboot primary image\n");
-		/* This is a warning so we don't want it to disable the
-		 * device as we have a secondary preboot image
-		 */
-		err_val &= ~CPU_BOOT_ERR0_PRI_IMG_VER_FAIL;
-	}
-
-	if (err_val & CPU_BOOT_ERR0_TPM_FAIL) {
-		dev_warn(hdev->dev,
-			"Device boot warning - TPM failure\n");
-		/* This is a warning so we don't want it to disable the
-		 * device
-		 */
-		err_val &= ~CPU_BOOT_ERR0_TPM_FAIL;
-	}
+	if (err_val & CPU_BOOT_ERR0_TPM_FAIL)
+		dev_warn(hdev->dev, "Device boot warning - TPM failure\n");
 
-	if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) {
-		dev_err(hdev->dev,
-			"Device boot error - unknown ERR0 error 0x%08x\n", err_val);
+	if (err_val & CPU_BOOT_ERR_FATAL_MASK)
 		err_exists = true;
-	}
 
 	/* return error only if it's in the predefined mask */
 	if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) &
@@ -3295,6 +3244,14 @@ int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_in
 					HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
 }
 
+int hl_fw_get_dev_info_signed(struct hl_device *hdev,
+			      struct cpucp_dev_info_signed *dev_info_signed, u32 nonce)
+{
+	return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_INFO_SIGNED_GET, dev_info_signed,
+					 sizeof(struct cpucp_dev_info_signed), nonce,
+					 HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
+}
+
 int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type sub_opcode,
 						dma_addr_t buff, u32 *size)
 {
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 1655c101c705..2a900c9941fe 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -1262,6 +1262,7 @@ struct hl_dec {
  * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000).
  * @ASIC_GAUDI2: Gaudi2 device.
  * @ASIC_GAUDI2B: Gaudi2B device.
+ * @ASIC_GAUDI2C: Gaudi2C device.
  */
 enum hl_asic_type {
 	ASIC_INVALID,
@@ -1270,6 +1271,7 @@ enum hl_asic_type {
 	ASIC_GAUDI_SEC,
 	ASIC_GAUDI2,
 	ASIC_GAUDI2B,
+	ASIC_GAUDI2C,
 };
 
 struct hl_cs_parser;
@@ -3519,6 +3521,9 @@ struct hl_device {
 	u8				heartbeat;
 };
 
+/* Retrieve PCI device name in case of a PCI device or dev name in simulator */
+#define HL_DEV_NAME(hdev)	\
+		((hdev)->pdev ? dev_name(&(hdev)->pdev->dev) : "NA-DEVICE")
 
 /**
  * struct hl_cs_encaps_sig_handle - encapsulated signals handle structure
@@ -3594,6 +3599,14 @@ static inline bool hl_is_fw_sw_ver_below(struct hl_device *hdev, u32 fw_sw_major
 	return false;
 }
 
+static inline bool hl_is_fw_sw_ver_equal_or_greater(struct hl_device *hdev, u32 fw_sw_major,
+							u32 fw_sw_minor)
+{
+	return (hdev->fw_sw_major_ver > fw_sw_major ||
+			(hdev->fw_sw_major_ver == fw_sw_major &&
+					hdev->fw_sw_minor_ver >= fw_sw_minor));
+}
+
 /*
  * Kernel module functions that can be accessed by entire module
  */
@@ -3954,6 +3967,8 @@ long hl_fw_get_max_power(struct hl_device *hdev);
 void hl_fw_set_max_power(struct hl_device *hdev);
 int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
 				u32 nonce);
+int hl_fw_get_dev_info_signed(struct hl_device *hdev,
+			      struct cpucp_dev_info_signed *dev_info_signed, u32 nonce);
 int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value);
 int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value);
 int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value);
diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c
index 306a5bc9bf89..e542fd40e16c 100644
--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@@ -141,6 +141,9 @@ static enum hl_asic_type get_asic_type(struct hl_device *hdev)
 		case REV_ID_B:
 			asic_type = ASIC_GAUDI2B;
 			break;
+		case REV_ID_C:
+			asic_type = ASIC_GAUDI2C;
+			break;
 		default:
 			break;
 		}
@@ -670,6 +673,38 @@ static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
+static void hl_pci_reset_prepare(struct pci_dev *pdev)
+{
+	struct hl_device *hdev;
+
+	hdev = pci_get_drvdata(pdev);
+	if (!hdev)
+		return;
+
+	hdev->disabled = true;
+}
+
+static void hl_pci_reset_done(struct pci_dev *pdev)
+{
+	struct hl_device *hdev;
+	u32 flags;
+
+	hdev = pci_get_drvdata(pdev);
+	if (!hdev)
+		return;
+
+	/*
+	 * Schedule a thread to trigger hard reset.
+	 * The reason for this handler, is for rare cases where the driver is up
+	 * and FLR occurs. This is valid only when working with no VM, so FW handles FLR
+	 * and resets the device. FW will go back preboot stage, so driver needs to perform
+	 * hard reset in order to load FW fit again.
+	 */
+	flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW;
+
+	hl_device_reset(hdev, flags);
+}
+
 static const struct dev_pm_ops hl_pm_ops = {
 	.suspend = hl_pmops_suspend,
 	.resume = hl_pmops_resume,
@@ -679,6 +714,8 @@ static const struct pci_error_handlers hl_pci_err_handler = {
 	.error_detected = hl_pci_err_detected,
 	.slot_reset = hl_pci_err_slot_reset,
 	.resume = hl_pci_err_resume,
+	.reset_prepare = hl_pci_reset_prepare,
+	.reset_done = hl_pci_reset_done,
 };
 
 static struct pci_driver hl_pci_driver = {
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 8ef36effb95b..1dd6e23172ca 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -19,6 +19,9 @@
 
 #include <asm/msr.h>
 
+/* make sure there is space for all the signed info */
+static_assert(sizeof(struct cpucp_info) <= SEC_DEV_INFO_BUF_SZ);
+
 static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
 	[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
 	[HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
@@ -685,7 +688,7 @@ static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 	if (!sec_attest_info)
 		return -ENOMEM;
 
-	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
 		rc = -ENOMEM;
 		goto free_sec_attest_info;
@@ -719,6 +722,53 @@ free_sec_attest_info:
 	return rc;
 }
 
+static int dev_info_signed(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	struct cpucp_dev_info_signed *dev_info_signed;
+	struct hl_info_signed *info;
+	u32 max_size = args->return_size;
+	int rc;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	dev_info_signed = kzalloc(sizeof(*dev_info_signed), GFP_KERNEL);
+	if (!dev_info_signed)
+		return -ENOMEM;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		rc = -ENOMEM;
+		goto free_dev_info_signed;
+	}
+
+	rc = hl_fw_get_dev_info_signed(hpriv->hdev,
+					dev_info_signed, args->sec_attest_nonce);
+	if (rc)
+		goto free_info;
+
+	info->nonce = le32_to_cpu(dev_info_signed->nonce);
+	info->info_sig_len = dev_info_signed->info_sig_len;
+	info->pub_data_len = le16_to_cpu(dev_info_signed->pub_data_len);
+	info->certificate_len = le16_to_cpu(dev_info_signed->certificate_len);
+	info->dev_info_len = sizeof(struct cpucp_info);
+	memcpy(&info->info_sig, &dev_info_signed->info_sig, sizeof(info->info_sig));
+	memcpy(&info->public_data, &dev_info_signed->public_data, sizeof(info->public_data));
+	memcpy(&info->certificate, &dev_info_signed->certificate, sizeof(info->certificate));
+	memcpy(&info->dev_info, &dev_info_signed->info, info->dev_info_len);
+
+	rc = copy_to_user(out, info, min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0;
+
+free_info:
+	kfree(info);
+free_dev_info_signed:
+	kfree(dev_info_signed);
+
+	return rc;
+}
+
+
 static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args)
 {
 	int rc;
@@ -1089,6 +1139,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_FW_GENERIC_REQ:
 		return send_fw_generic_request(hdev, args);
 
+	case HL_INFO_DEV_SIGNED:
+		return dev_info_signed(hpriv, args);
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -EINVAL;
diff --git a/drivers/accel/habanalabs/common/hwmon.c b/drivers/accel/habanalabs/common/hwmon.c
index 8598056216e7..1ee2ee07e9ed 100644
--- a/drivers/accel/habanalabs/common/hwmon.c
+++ b/drivers/accel/habanalabs/common/hwmon.c
@@ -578,10 +578,6 @@ int hl_get_temperature(struct hl_device *hdev,
 				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.sensor_index = __cpu_to_le16(sensor_index);
 	pkt.type = __cpu_to_le16(attr);
-
-	dev_dbg(hdev->dev, "get temp, ctl 0x%x, sensor %d, type %d\n",
-		pkt.ctl, pkt.sensor_index, pkt.type);
-
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
 						0, &result);
 
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 0b8689fe0b64..3348ad12c237 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -955,8 +955,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 				(i + 1) == phys_pg_pack->npages);
 		if (rc) {
 			dev_err(hdev->dev,
-				"map failed for handle %u, npages: %llu, mapped: %llu",
-				phys_pg_pack->handle, phys_pg_pack->npages,
+				"map failed (%d) for handle %u, npages: %llu, mapped: %llu\n",
+				rc, phys_pg_pack->handle, phys_pg_pack->npages,
 				mapped_pg_cnt);
 			goto err;
 		}
@@ -1186,7 +1186,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device
 
 	rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
 	if (rc) {
-		dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
+		dev_err(hdev->dev, "mapping page pack failed (%d) for handle %u\n",
+			rc, handle);
 		mutex_unlock(&hdev->mmu_lock);
 		goto map_err;
 	}
diff --git a/drivers/accel/habanalabs/common/mmu/mmu.c b/drivers/accel/habanalabs/common/mmu/mmu.c
index b2145716c605..b654302a68fc 100644
--- a/drivers/accel/habanalabs/common/mmu/mmu.c
+++ b/drivers/accel/habanalabs/common/mmu/mmu.c
@@ -596,6 +596,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
 		break;
 	case ASIC_GAUDI2:
 	case ASIC_GAUDI2B:
+	case ASIC_GAUDI2C:
 		/* MMUs in Gaudi2 are always host resident */
 		hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
 		break;
diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c
index 01f89f029355..8a9f98832157 100644
--- a/drivers/accel/habanalabs/common/sysfs.c
+++ b/drivers/accel/habanalabs/common/sysfs.c
@@ -8,6 +8,7 @@
 #include "habanalabs.h"
 
 #include <linux/pci.h>
+#include <linux/types.h>
 
 static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
@@ -80,12 +81,27 @@ static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, c
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	struct cpucp_info *cpucp_info;
+	u32 infineon_second_stage_version;
+	u32 infineon_second_stage_first_instance;
+	u32 infineon_second_stage_second_instance;
+	u32 infineon_second_stage_third_instance;
+	u32 mask = 0xff;
 
 	cpucp_info = &hdev->asic_prop.cpucp_info;
 
+	infineon_second_stage_version = le32_to_cpu(cpucp_info->infineon_second_stage_version);
+	infineon_second_stage_first_instance = infineon_second_stage_version & mask;
+	infineon_second_stage_second_instance =
+					(infineon_second_stage_version >> 8) & mask;
+	infineon_second_stage_third_instance =
+					(infineon_second_stage_version >> 16) & mask;
+
 	if (cpucp_info->infineon_second_stage_version)
-		return sprintf(buf, "%#04x %#04x\n", le32_to_cpu(cpucp_info->infineon_version),
-				le32_to_cpu(cpucp_info->infineon_second_stage_version));
+		return sprintf(buf, "%#04x %#04x:%#04x:%#04x\n",
+				le32_to_cpu(cpucp_info->infineon_version),
+				infineon_second_stage_first_instance,
+				infineon_second_stage_second_instance,
+				infineon_second_stage_third_instance);
 	else
 		return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
 }
@@ -251,6 +267,9 @@ static ssize_t device_type_show(struct device *dev,
 	case ASIC_GAUDI2B:
 		str = "GAUDI2B";
 		break;
+	case ASIC_GAUDI2C:
+		str = "GAUDI2C";
+		break;
 	default:
 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
 				hdev->asic_type);
@@ -383,6 +402,21 @@ static ssize_t security_enabled_show(struct device *dev,
 	return sprintf(buf, "%d\n", hdev->asic_prop.fw_security_enabled);
 }
 
+static ssize_t module_id_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", le32_to_cpu(hdev->asic_prop.cpucp_info.card_location));
+}
+
+static ssize_t parent_device_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", HL_DEV_NAME(hdev));
+}
+
 static DEVICE_ATTR_RO(armcp_kernel_ver);
 static DEVICE_ATTR_RO(armcp_ver);
 static DEVICE_ATTR_RO(cpld_ver);
@@ -402,6 +436,8 @@ static DEVICE_ATTR_RO(thermal_ver);
 static DEVICE_ATTR_RO(uboot_ver);
 static DEVICE_ATTR_RO(fw_os_ver);
 static DEVICE_ATTR_RO(security_enabled);
+static DEVICE_ATTR_RO(module_id);
+static DEVICE_ATTR_RO(parent_device);
 
 static struct bin_attribute bin_attr_eeprom = {
 	.attr = {.name = "eeprom", .mode = (0444)},
@@ -427,6 +463,8 @@ static struct attribute *hl_dev_attrs[] = {
 	&dev_attr_uboot_ver.attr,
 	&dev_attr_fw_os_ver.attr,
 	&dev_attr_security_enabled.attr,
+	&dev_attr_module_id.attr,
+	&dev_attr_parent_device.attr,
 	NULL,
 };
 
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 819660c684cf..e0e5615ef9b0 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7858,39 +7858,44 @@ static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 	return !!ecc_data->is_critical;
 }
 
-static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask)
+static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id)
 {
-	u32 lo, hi, cq_ptr_size, arc_cq_ptr_size;
-	u64 cq_ptr, arc_cq_ptr, cp_current_inst;
+	struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
+	u64 cq_ptr, cp_current_inst;
+	u32 lo, hi, cq_size, cp_sts;
+	bool is_arc_cq;
 
-	lo = RREG32(qman_base + QM_CQ_PTR_LO_4_OFFSET);
-	hi = RREG32(qman_base + QM_CQ_PTR_HI_4_OFFSET);
-	cq_ptr = ((u64) hi) << 32 | lo;
-	cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_4_OFFSET);
+	cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET);
+	is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */
 
-	lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_OFFSET);
-	hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_OFFSET);
-	arc_cq_ptr = ((u64) hi) << 32 | lo;
-	arc_cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_OFFSET);
+	if (is_arc_cq) {
+		lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET);
+		hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET);
+		cq_ptr = ((u64) hi) << 32 | lo;
+		cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET);
+	} else {
+		lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET);
+		hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET);
+		cq_ptr = ((u64) hi) << 32 | lo;
+		cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET);
+	}
 
 	lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
 	hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET);
 	cp_current_inst = ((u64) hi) << 32 | lo;
 
 	dev_info(hdev->dev,
-		"LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
-		cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst);
+		"LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n",
+		is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst);
 
-	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
-		if (arc_cq_ptr) {
-			hdev->captured_err_info.undef_opcode.cq_addr = arc_cq_ptr;
-			hdev->captured_err_info.undef_opcode.cq_size = arc_cq_ptr_size;
-		} else {
-			hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
-			hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size;
-		}
-
-		hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS;
+	if (undef_opcode->write_enable) {
+		memset(undef_opcode, 0, sizeof(*undef_opcode));
+		undef_opcode->timestamp = ktime_get();
+		undef_opcode->cq_addr = cq_ptr;
+		undef_opcode->cq_size = cq_size;
+		undef_opcode->engine_id = engine_id;
+		undef_opcode->stream_id = QMAN_STREAMS;
+		undef_opcode->write_enable = 0;
 	}
 }
 
@@ -7929,21 +7934,12 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type
 				error_count++;
 			}
 
-		if (i == QMAN_STREAMS && error_count) {
-			/* check for undefined opcode */
-			if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK &&
-					hdev->captured_err_info.undef_opcode.write_enable) {
-				memset(&hdev->captured_err_info.undef_opcode, 0,
-						sizeof(hdev->captured_err_info.undef_opcode));
-
-				hdev->captured_err_info.undef_opcode.write_enable = false;
-				hdev->captured_err_info.undef_opcode.timestamp = ktime_get();
-				hdev->captured_err_info.undef_opcode.engine_id =
-							gaudi2_queue_id_to_engine_id[qid_base];
-				*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
-			}
-
-			handle_lower_qman_data_on_err(hdev, qman_base, *event_mask);
+		/* Check for undefined opcode error in lower QM */
+		if ((i == QMAN_STREAMS) &&
+				(glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) {
+			handle_lower_qman_data_on_err(hdev, qman_base,
+							gaudi2_queue_id_to_engine_id[qid_base]);
+			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
 		}
 	}
 
@@ -10007,6 +10003,8 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+		if (hl_is_fw_sw_ver_equal_or_greater(hdev, 1, 13))
+			is_critical = true;
 		break;
 
 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
diff --git a/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h b/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
index a08378d0802b..d21fcd3880b4 100644
--- a/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
+++ b/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
@@ -242,14 +242,15 @@
 #define QM_FENCE2_OFFSET		(mmPDMA0_QM_CP_FENCE2_RDATA_0 - mmPDMA0_QM_BASE)
 #define QM_SEI_STATUS_OFFSET		(mmPDMA0_QM_SEI_STATUS - mmPDMA0_QM_BASE)
 
-#define QM_CQ_PTR_LO_4_OFFSET		(mmPDMA0_QM_CQ_PTR_LO_4 - mmPDMA0_QM_BASE)
-#define QM_CQ_PTR_HI_4_OFFSET		(mmPDMA0_QM_CQ_PTR_HI_4 - mmPDMA0_QM_BASE)
-#define QM_CQ_TSIZE_4_OFFSET		(mmPDMA0_QM_CQ_TSIZE_4 - mmPDMA0_QM_BASE)
+#define QM_CQ_TSIZE_STS_4_OFFSET	(mmPDMA0_QM_CQ_TSIZE_STS_4 - mmPDMA0_QM_BASE)
+#define QM_CQ_PTR_LO_STS_4_OFFSET	(mmPDMA0_QM_CQ_PTR_LO_STS_4 - mmPDMA0_QM_BASE)
+#define QM_CQ_PTR_HI_STS_4_OFFSET	(mmPDMA0_QM_CQ_PTR_HI_STS_4 - mmPDMA0_QM_BASE)
 
-#define QM_ARC_CQ_PTR_LO_OFFSET		(mmPDMA0_QM_ARC_CQ_PTR_LO - mmPDMA0_QM_BASE)
-#define QM_ARC_CQ_PTR_HI_OFFSET		(mmPDMA0_QM_ARC_CQ_PTR_HI - mmPDMA0_QM_BASE)
-#define QM_ARC_CQ_TSIZE_OFFSET		(mmPDMA0_QM_ARC_CQ_TSIZE - mmPDMA0_QM_BASE)
+#define QM_ARC_CQ_TSIZE_STS_OFFSET	(mmPDMA0_QM_ARC_CQ_TSIZE_STS - mmPDMA0_QM_BASE)
+#define QM_ARC_CQ_PTR_LO_STS_OFFSET	(mmPDMA0_QM_ARC_CQ_PTR_LO_STS - mmPDMA0_QM_BASE)
+#define QM_ARC_CQ_PTR_HI_STS_OFFSET	(mmPDMA0_QM_ARC_CQ_PTR_HI_STS - mmPDMA0_QM_BASE)
 
+#define QM_CP_STS_4_OFFSET		(mmPDMA0_QM_CP_STS_4 - mmPDMA0_QM_BASE)
 #define QM_CP_CURRENT_INST_LO_4_OFFSET	(mmPDMA0_QM_CP_CURRENT_INST_LO_4 - mmPDMA0_QM_BASE)
 #define QM_CP_CURRENT_INST_HI_4_OFFSET	(mmPDMA0_QM_CP_CURRENT_INST_HI_4 - mmPDMA0_QM_BASE)
 
diff --git a/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h b/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h
index f5d497dc9bdc..4f951cada077 100644
--- a/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h
+++ b/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h
@@ -25,6 +25,7 @@ enum hl_revision_id {
 	REV_ID_INVALID				= 0x00,
 	REV_ID_A				= 0x01,
 	REV_ID_B				= 0x02,
+	REV_ID_C				= 0x03
 };
 
 #endif /* INCLUDE_PCI_GENERAL_H_ */
diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index d321ca7160d9..6cee536c229a 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -253,8 +253,7 @@ static const struct backlight_ops acpi_backlight_ops = {
 static int video_get_max_state(struct thermal_cooling_device *cooling_dev,
 			       unsigned long *state)
 {
-	struct acpi_device *device = cooling_dev->devdata;
-	struct acpi_video_device *video = acpi_driver_data(device);
+	struct acpi_video_device *video = cooling_dev->devdata;
 
 	*state = video->brightness->count - ACPI_VIDEO_FIRST_LEVEL - 1;
 	return 0;
@@ -263,8 +262,7 @@ static int video_get_max_state(struct thermal_cooling_device *cooling_dev,
 static int video_get_cur_state(struct thermal_cooling_device *cooling_dev,
 			       unsigned long *state)
 {
-	struct acpi_device *device = cooling_dev->devdata;
-	struct acpi_video_device *video = acpi_driver_data(device);
+	struct acpi_video_device *video = cooling_dev->devdata;
 	unsigned long long level;
 	int offset;
 
@@ -283,8 +281,7 @@ static int video_get_cur_state(struct thermal_cooling_device *cooling_dev,
 static int
 video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long state)
 {
-	struct acpi_device *device = cooling_dev->devdata;
-	struct acpi_video_device *video = acpi_driver_data(device);
+	struct acpi_video_device *video = cooling_dev->devdata;
 	int level;
 
 	if (state >= video->brightness->count - ACPI_VIDEO_FIRST_LEVEL)
@@ -1125,7 +1122,6 @@ static int acpi_video_bus_get_one_device(struct acpi_device *device, void *arg)
 
 	strcpy(acpi_device_name(device), ACPI_VIDEO_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_VIDEO_CLASS);
-	device->driver_data = data;
 
 	data->device_id = device_id;
 	data->video = video;
@@ -1747,8 +1743,8 @@ static void acpi_video_dev_register_backlight(struct acpi_video_device *device)
 	device->backlight->props.brightness =
 			acpi_video_get_brightness(device->backlight);
 
-	device->cooling_dev = thermal_cooling_device_register("LCD",
-				device->dev, &video_cooling_ops);
+	device->cooling_dev = thermal_cooling_device_register("LCD", device,
+							      &video_cooling_ops);
 	if (IS_ERR(device->cooling_dev)) {
 		/*
 		 * Set cooling_dev to NULL so we don't crash trying to free it.
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index fa5dd71a80fa..02bb2cce423f 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1568,17 +1568,22 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev,
 	int err;
 	const struct iommu_ops *ops;
 
+	/* Serialise to make dev->iommu stable under our potential fwspec */
+	mutex_lock(&iommu_probe_device_lock);
 	/*
 	 * If we already translated the fwspec there is nothing left to do,
 	 * return the iommu_ops.
 	 */
 	ops = acpi_iommu_fwspec_ops(dev);
-	if (ops)
+	if (ops) {
+		mutex_unlock(&iommu_probe_device_lock);
 		return ops;
+	}
 
 	err = iort_iommu_configure_id(dev, id_in);
 	if (err && err != -EPROBE_DEFER)
 		err = viot_iommu_configure(dev);
+	mutex_unlock(&iommu_probe_device_lock);
 
 	/*
 	 * If we have reason to believe the IOMMU driver missed the initial
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 28c75242fca9..62944e35fcee 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -399,13 +399,13 @@ acpi_evaluate_reference(acpi_handle handle,
 		acpi_handle_debug(list->handles[i], "Found in reference list\n");
 	}
 
-end:
 	if (ACPI_FAILURE(status)) {
 		list->count = 0;
 		kfree(list->handles);
 		list->handles = NULL;
 	}
 
+end:
 	kfree(buffer.pointer);
 
 	return status;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index c10ff8985203..0a0f483124c3 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1055,9 +1055,14 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
 		 * Ask the sd driver to issue START STOP UNIT on runtime suspend
 		 * and resume and shutdown only. For system level suspend/resume,
 		 * devices power state is handled directly by libata EH.
+		 * Given that disks are always spun up on system resume, also
+		 * make sure that the sd driver forces runtime suspended disks
+		 * to be resumed to correctly reflect the power state of the
+		 * device.
 		 */
-		sdev->manage_runtime_start_stop = true;
-		sdev->manage_shutdown = true;
+		sdev->manage_runtime_start_stop = 1;
+		sdev->manage_shutdown = 1;
+		sdev->force_runtime_start_on_system_start = 1;
 	}
 
 	/*
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 9ea22e165acd..548491de818e 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -144,7 +144,7 @@ static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store);
 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
 #endif /* CONFIG_HOTPLUG_CPU */
 
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
 #include <linux/kexec.h>
 
 static ssize_t crash_notes_show(struct device *dev,
@@ -189,14 +189,14 @@ static const struct attribute_group crash_note_cpu_attr_group = {
 #endif
 
 static const struct attribute_group *common_cpu_attr_groups[] = {
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
 	&crash_note_cpu_attr_group,
 #endif
 	NULL
 };
 
 static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
 	&crash_note_cpu_attr_group,
 #endif
 	NULL
diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
index 91536ee05f14..7e2d1f0d903a 100644
--- a/drivers/base/devcoredump.c
+++ b/drivers/base/devcoredump.c
@@ -362,6 +362,7 @@ void dev_coredumpm(struct device *dev, struct module *owner,
 	devcd->devcd_dev.class = &devcd_class;
 
 	mutex_lock(&devcd->mutex);
+	dev_set_uevent_suppress(&devcd->devcd_dev, true);
 	if (device_add(&devcd->devcd_dev))
 		goto put_device;
 
@@ -376,6 +377,8 @@ void dev_coredumpm(struct device *dev, struct module *owner,
 		              "devcoredump"))
 		dev_warn(dev, "devcoredump create_link failed\n");
 
+	dev_set_uevent_suppress(&devcd->devcd_dev, false);
+	kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
 	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
 	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
 	mutex_unlock(&devcd->mutex);
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index f3b9a4d0fa3b..8a13babd826c 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -180,6 +180,9 @@ static inline unsigned long memblk_nr_poison(struct memory_block *mem)
 }
 #endif
 
+/*
+ * Must acquire mem_hotplug_lock in write mode.
+ */
 static int memory_block_online(struct memory_block *mem)
 {
 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
@@ -204,10 +207,11 @@ static int memory_block_online(struct memory_block *mem)
 	if (mem->altmap)
 		nr_vmemmap_pages = mem->altmap->free;
 
+	mem_hotplug_begin();
 	if (nr_vmemmap_pages) {
 		ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
 		if (ret)
-			return ret;
+			goto out;
 	}
 
 	ret = online_pages(start_pfn + nr_vmemmap_pages,
@@ -215,7 +219,7 @@ static int memory_block_online(struct memory_block *mem)
 	if (ret) {
 		if (nr_vmemmap_pages)
 			mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
-		return ret;
+		goto out;
 	}
 
 	/*
@@ -227,9 +231,14 @@ static int memory_block_online(struct memory_block *mem)
 					  nr_vmemmap_pages);
 
 	mem->zone = zone;
+out:
+	mem_hotplug_done();
 	return ret;
 }
 
+/*
+ * Must acquire mem_hotplug_lock in write mode.
+ */
 static int memory_block_offline(struct memory_block *mem)
 {
 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
@@ -247,6 +256,7 @@ static int memory_block_offline(struct memory_block *mem)
 	if (mem->altmap)
 		nr_vmemmap_pages = mem->altmap->free;
 
+	mem_hotplug_begin();
 	if (nr_vmemmap_pages)
 		adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
 					  -nr_vmemmap_pages);
@@ -258,13 +268,15 @@ static int memory_block_offline(struct memory_block *mem)
 		if (nr_vmemmap_pages)
 			adjust_present_page_count(pfn_to_page(start_pfn),
 						  mem->group, nr_vmemmap_pages);
-		return ret;
+		goto out;
 	}
 
 	if (nr_vmemmap_pages)
 		mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
 
 	mem->zone = NULL;
+out:
+	mem_hotplug_done();
 	return ret;
 }
 
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 92592f944a3d..ac63a73ccdaa 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -410,8 +410,7 @@ out:
 			rb_entry(node, struct regmap_range_node, node);
 
 		/* If there's nothing in the cache there's nothing to sync */
-		ret = regcache_read(map, this->selector_reg, &i);
-		if (ret != 0)
+		if (regcache_read(map, this->selector_reg, &i) != 0)
 			continue;
 
 		ret = _regmap_write(map, this->selector_reg, i);
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 9a1e194d5cf8..1f6186475715 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -307,11 +307,11 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
 		highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
 
 	WRITE_ONCE(cpudata->highest_perf, highest_perf);
-
+	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
 	WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
 	WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
 	WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
-
+	WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
 	return 0;
 }
 
@@ -329,11 +329,12 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
 		highest_perf = cppc_perf.highest_perf;
 
 	WRITE_ONCE(cpudata->highest_perf, highest_perf);
-
+	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
 	WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
 	WRITE_ONCE(cpudata->lowest_nonlinear_perf,
 		   cppc_perf.lowest_nonlinear_perf);
 	WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
+	WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
 
 	if (cppc_state == AMD_PSTATE_ACTIVE)
 		return 0;
@@ -432,6 +433,10 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
 	u64 prev = READ_ONCE(cpudata->cppc_req_cached);
 	u64 value = prev;
 
+	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
+			cpudata->max_limit_perf);
+	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
+			cpudata->max_limit_perf);
 	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
 
 	if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
@@ -470,6 +475,22 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy)
 	return 0;
 }
 
+static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
+{
+	u32 max_limit_perf, min_limit_perf;
+	struct amd_cpudata *cpudata = policy->driver_data;
+
+	max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
+	min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
+
+	WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
+	WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
+	WRITE_ONCE(cpudata->max_limit_freq, policy->max);
+	WRITE_ONCE(cpudata->min_limit_freq, policy->min);
+
+	return 0;
+}
+
 static int amd_pstate_update_freq(struct cpufreq_policy *policy,
 				  unsigned int target_freq, bool fast_switch)
 {
@@ -480,6 +501,9 @@ static int amd_pstate_update_freq(struct cpufreq_policy *policy,
 	if (!cpudata->max_freq)
 		return -ENODEV;
 
+	if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
+		amd_pstate_update_min_max_limit(policy);
+
 	cap_perf = READ_ONCE(cpudata->highest_perf);
 	min_perf = READ_ONCE(cpudata->lowest_perf);
 	max_perf = cap_perf;
@@ -518,7 +542,9 @@ static int amd_pstate_target(struct cpufreq_policy *policy,
 static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy,
 				  unsigned int target_freq)
 {
-	return amd_pstate_update_freq(policy, target_freq, true);
+	if (!amd_pstate_update_freq(policy, target_freq, true))
+		return target_freq;
+	return policy->cur;
 }
 
 static void amd_pstate_adjust_perf(unsigned int cpu,
@@ -532,6 +558,10 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
 	struct amd_cpudata *cpudata = policy->driver_data;
 	unsigned int target_freq;
 
+	if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
+		amd_pstate_update_min_max_limit(policy);
+
+
 	cap_perf = READ_ONCE(cpudata->highest_perf);
 	lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
 	max_freq = READ_ONCE(cpudata->max_freq);
@@ -745,6 +775,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
 	/* Initial processor data capability frequencies */
 	cpudata->max_freq = max_freq;
 	cpudata->min_freq = min_freq;
+	cpudata->max_limit_freq = max_freq;
+	cpudata->min_limit_freq = min_freq;
 	cpudata->nominal_freq = nominal_freq;
 	cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
 
@@ -850,11 +882,16 @@ static ssize_t show_energy_performance_available_preferences(
 {
 	int i = 0;
 	int offset = 0;
+	struct amd_cpudata *cpudata = policy->driver_data;
+
+	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+		return sysfs_emit_at(buf, offset, "%s\n",
+				energy_perf_strings[EPP_INDEX_PERFORMANCE]);
 
 	while (energy_perf_strings[i] != NULL)
 		offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]);
 
-	sysfs_emit_at(buf, offset, "\n");
+	offset += sysfs_emit_at(buf, offset, "\n");
 
 	return offset;
 }
@@ -1183,16 +1220,25 @@ static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static void amd_pstate_epp_init(unsigned int cpu)
+static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
 {
-	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
 	struct amd_cpudata *cpudata = policy->driver_data;
-	u32 max_perf, min_perf;
+	u32 max_perf, min_perf, min_limit_perf, max_limit_perf;
 	u64 value;
 	s16 epp;
 
 	max_perf = READ_ONCE(cpudata->highest_perf);
 	min_perf = READ_ONCE(cpudata->lowest_perf);
+	max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
+	min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
+
+	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
+			cpudata->max_limit_perf);
+	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
+			cpudata->max_limit_perf);
+
+	WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
+	WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
 
 	value = READ_ONCE(cpudata->cppc_req_cached);
 
@@ -1210,9 +1256,6 @@ static void amd_pstate_epp_init(unsigned int cpu)
 	value &= ~AMD_CPPC_DES_PERF(~0L);
 	value |= AMD_CPPC_DES_PERF(0);
 
-	if (cpudata->epp_policy == cpudata->policy)
-		goto skip_epp;
-
 	cpudata->epp_policy = cpudata->policy;
 
 	/* Get BIOS pre-defined epp value */
@@ -1222,7 +1265,7 @@ static void amd_pstate_epp_init(unsigned int cpu)
 		 * This return value can only be negative for shared_memory
 		 * systems where EPP register read/write not supported.
 		 */
-		goto skip_epp;
+		return;
 	}
 
 	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
@@ -1236,8 +1279,6 @@ static void amd_pstate_epp_init(unsigned int cpu)
 
 	WRITE_ONCE(cpudata->cppc_req_cached, value);
 	amd_pstate_set_epp(cpudata, epp);
-skip_epp:
-	cpufreq_cpu_put(policy);
 }
 
 static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
@@ -1252,7 +1293,7 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
 
 	cpudata->policy = policy->policy;
 
-	amd_pstate_epp_init(policy->cpu);
+	amd_pstate_epp_update_limit(policy);
 
 	return 0;
 }
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 494d044b9e72..33728c242f66 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -327,7 +327,7 @@ static int imx6ul_opp_check_speed_grading(struct device *dev)
 			imx6x_disable_freq_in_opp(dev, 696000000);
 
 	if (of_machine_is_compatible("fsl,imx6ull")) {
-		if (val != OCOTP_CFG3_6ULL_SPEED_792MHZ)
+		if (val < OCOTP_CFG3_6ULL_SPEED_792MHZ)
 			imx6x_disable_freq_in_opp(dev, 792000000);
 
 		if (val != OCOTP_CFG3_6ULL_SPEED_900MHZ)
diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c
index 6355a39418c5..ea05d9d67490 100644
--- a/drivers/cpufreq/qcom-cpufreq-nvmem.c
+++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c
@@ -23,8 +23,10 @@
 #include <linux/nvmem-consumer.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/pm.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_opp.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/soc/qcom/smem.h>
 
@@ -55,6 +57,7 @@ struct qcom_cpufreq_match_data {
 
 struct qcom_cpufreq_drv_cpu {
 	int opp_token;
+	struct device **virt_devs;
 };
 
 struct qcom_cpufreq_drv {
@@ -424,6 +427,30 @@ static const struct qcom_cpufreq_match_data match_data_ipq8074 = {
 	.get_version = qcom_cpufreq_ipq8074_name_version,
 };
 
+static void qcom_cpufreq_suspend_virt_devs(struct qcom_cpufreq_drv *drv, unsigned int cpu)
+{
+	const char * const *name = drv->data->genpd_names;
+	int i;
+
+	if (!drv->cpus[cpu].virt_devs)
+		return;
+
+	for (i = 0; *name; i++, name++)
+		device_set_awake_path(drv->cpus[cpu].virt_devs[i]);
+}
+
+static void qcom_cpufreq_put_virt_devs(struct qcom_cpufreq_drv *drv, unsigned int cpu)
+{
+	const char * const *name = drv->data->genpd_names;
+	int i;
+
+	if (!drv->cpus[cpu].virt_devs)
+		return;
+
+	for (i = 0; *name; i++, name++)
+		pm_runtime_put(drv->cpus[cpu].virt_devs[i]);
+}
+
 static int qcom_cpufreq_probe(struct platform_device *pdev)
 {
 	struct qcom_cpufreq_drv *drv;
@@ -478,6 +505,7 @@ static int qcom_cpufreq_probe(struct platform_device *pdev)
 	of_node_put(np);
 
 	for_each_possible_cpu(cpu) {
+		struct device **virt_devs = NULL;
 		struct dev_pm_opp_config config = {
 			.supported_hw = NULL,
 		};
@@ -498,7 +526,7 @@ static int qcom_cpufreq_probe(struct platform_device *pdev)
 
 		if (drv->data->genpd_names) {
 			config.genpd_names = drv->data->genpd_names;
-			config.virt_devs = NULL;
+			config.virt_devs = &virt_devs;
 		}
 
 		if (config.supported_hw || config.genpd_names) {
@@ -509,6 +537,27 @@ static int qcom_cpufreq_probe(struct platform_device *pdev)
 				goto free_opp;
 			}
 		}
+
+		if (virt_devs) {
+			const char * const *name = config.genpd_names;
+			int i, j;
+
+			for (i = 0; *name; i++, name++) {
+				ret = pm_runtime_resume_and_get(virt_devs[i]);
+				if (ret) {
+					dev_err(cpu_dev, "failed to resume %s: %d\n",
+						*name, ret);
+
+					/* Rollback previous PM runtime calls */
+					name = config.genpd_names;
+					for (j = 0; *name && j < i; j++, name++)
+						pm_runtime_put(virt_devs[j]);
+
+					goto free_opp;
+				}
+			}
+			drv->cpus[cpu].virt_devs = virt_devs;
+		}
 	}
 
 	cpufreq_dt_pdev = platform_device_register_simple("cpufreq-dt", -1,
@@ -522,8 +571,10 @@ static int qcom_cpufreq_probe(struct platform_device *pdev)
 	dev_err(cpu_dev, "Failed to register platform device\n");
 
 free_opp:
-	for_each_possible_cpu(cpu)
+	for_each_possible_cpu(cpu) {
+		qcom_cpufreq_put_virt_devs(drv, cpu);
 		dev_pm_opp_clear_config(drv->cpus[cpu].opp_token);
+	}
 	return ret;
 }
 
@@ -534,15 +585,31 @@ static void qcom_cpufreq_remove(struct platform_device *pdev)
 
 	platform_device_unregister(cpufreq_dt_pdev);
 
-	for_each_possible_cpu(cpu)
+	for_each_possible_cpu(cpu) {
+		qcom_cpufreq_put_virt_devs(drv, cpu);
 		dev_pm_opp_clear_config(drv->cpus[cpu].opp_token);
+	}
 }
 
+static int qcom_cpufreq_suspend(struct device *dev)
+{
+	struct qcom_cpufreq_drv *drv = dev_get_drvdata(dev);
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu)
+		qcom_cpufreq_suspend_virt_devs(drv, cpu);
+
+	return 0;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(qcom_cpufreq_pm_ops, qcom_cpufreq_suspend, NULL);
+
 static struct platform_driver qcom_cpufreq_driver = {
 	.probe = qcom_cpufreq_probe,
 	.remove_new = qcom_cpufreq_remove,
 	.driver = {
 		.name = "qcom-cpufreq-nvmem",
+		.pm = pm_sleep_ptr(&qcom_cpufreq_pm_ops),
 	},
 };
 
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 38b4110378de..eb8b733065b2 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -301,7 +301,7 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
 
 		dma_resv_list_entry(fobj, i, obj, &old, &old_usage);
 		if ((old->context == fence->context && old_usage >= usage &&
-		     dma_fence_is_later(fence, old)) ||
+		     dma_fence_is_later_or_same(fence, old)) ||
 		    dma_fence_is_signaled(old)) {
 			dma_resv_list_set(fobj, i, fence, usage);
 			dma_fence_put(old);
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index aa597cda0d88..2828e9573e90 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -717,14 +717,11 @@ static void create_units(struct fw_device *device)
 					fw_unit_attributes,
 					&unit->attribute_group);
 
-		if (device_register(&unit->device) < 0)
-			goto skip_unit;
-
 		fw_device_get(device);
-		continue;
-
-	skip_unit:
-		kfree(unit);
+		if (device_register(&unit->device) < 0) {
+			put_device(&unit->device);
+			continue;
+		}
 	}
 }
 
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 7edf2c95282f..e779d866022b 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -1519,9 +1519,9 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
 	sdev->use_10_for_rw = 1;
 
 	if (sbp2_param_exclusive_login) {
-		sdev->manage_system_start_stop = true;
-		sdev->manage_runtime_start_stop = true;
-		sdev->manage_shutdown = true;
+		sdev->manage_system_start_stop = 1;
+		sdev->manage_runtime_start_stop = 1;
+		sdev->manage_shutdown = 1;
 	}
 
 	if (sdev->type == TYPE_ROM)
diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
index 07b72c679247..6146b2927d5c 100644
--- a/drivers/firmware/arm_ffa/driver.c
+++ b/drivers/firmware/arm_ffa/driver.c
@@ -99,6 +99,7 @@ struct ffa_drv_info {
 	void *tx_buffer;
 	bool mem_ops_native;
 	bool bitmap_created;
+	bool notif_enabled;
 	unsigned int sched_recv_irq;
 	unsigned int cpuhp_state;
 	struct ffa_pcpu_irq __percpu *irq_pcpu;
@@ -782,7 +783,7 @@ static void ffa_notification_info_get(void)
 			if (ids_processed >= max_ids - 1)
 				break;
 
-			part_id = packed_id_list[++ids_processed];
+			part_id = packed_id_list[ids_processed++];
 
 			if (!ids_count[list]) { /* Global Notification */
 				__do_sched_recv_cb(part_id, 0, false);
@@ -794,7 +795,7 @@ static void ffa_notification_info_get(void)
 				if (ids_processed >= max_ids - 1)
 					break;
 
-				vcpu_id = packed_id_list[++ids_processed];
+				vcpu_id = packed_id_list[ids_processed++];
 
 				__do_sched_recv_cb(part_id, vcpu_id, true);
 			}
@@ -889,6 +890,8 @@ static int ffa_memory_lend(struct ffa_mem_ops_args *args)
 
 #define FFA_SECURE_PARTITION_ID_FLAG	BIT(15)
 
+#define ffa_notifications_disabled()	(!drv_info->notif_enabled)
+
 enum notify_type {
 	NON_SECURE_VM,
 	SECURE_PARTITION,
@@ -908,6 +911,9 @@ static int ffa_sched_recv_cb_update(u16 part_id, ffa_sched_recv_cb callback,
 	struct ffa_dev_part_info *partition;
 	bool cb_valid;
 
+	if (ffa_notifications_disabled())
+		return -EOPNOTSUPP;
+
 	partition = xa_load(&drv_info->partition_info, part_id);
 	write_lock(&partition->rw_lock);
 
@@ -1001,6 +1007,9 @@ static int ffa_notify_relinquish(struct ffa_device *dev, int notify_id)
 	int rc;
 	enum notify_type type = ffa_notify_type_get(dev->vm_id);
 
+	if (ffa_notifications_disabled())
+		return -EOPNOTSUPP;
+
 	if (notify_id >= FFA_MAX_NOTIFICATIONS)
 		return -EINVAL;
 
@@ -1027,6 +1036,9 @@ static int ffa_notify_request(struct ffa_device *dev, bool is_per_vcpu,
 	u32 flags = 0;
 	enum notify_type type = ffa_notify_type_get(dev->vm_id);
 
+	if (ffa_notifications_disabled())
+		return -EOPNOTSUPP;
+
 	if (notify_id >= FFA_MAX_NOTIFICATIONS)
 		return -EINVAL;
 
@@ -1057,6 +1069,9 @@ static int ffa_notify_send(struct ffa_device *dev, int notify_id,
 {
 	u32 flags = 0;
 
+	if (ffa_notifications_disabled())
+		return -EOPNOTSUPP;
+
 	if (is_per_vcpu)
 		flags |= (PER_VCPU_NOTIFICATION_FLAG | vcpu << 16);
 
@@ -1233,7 +1248,7 @@ static void ffa_partitions_cleanup(void)
 	if (!count)
 		return;
 
-	info = kcalloc(count, sizeof(**info), GFP_KERNEL);
+	info = kcalloc(count, sizeof(*info), GFP_KERNEL);
 	if (!info)
 		return;
 
@@ -1311,8 +1326,10 @@ static int ffa_sched_recv_irq_map(void)
 
 static void ffa_sched_recv_irq_unmap(void)
 {
-	if (drv_info->sched_recv_irq)
+	if (drv_info->sched_recv_irq) {
 		irq_dispose_mapping(drv_info->sched_recv_irq);
+		drv_info->sched_recv_irq = 0;
+	}
 }
 
 static int ffa_cpuhp_pcpu_irq_enable(unsigned int cpu)
@@ -1329,17 +1346,23 @@ static int ffa_cpuhp_pcpu_irq_disable(unsigned int cpu)
 
 static void ffa_uninit_pcpu_irq(void)
 {
-	if (drv_info->cpuhp_state)
+	if (drv_info->cpuhp_state) {
 		cpuhp_remove_state(drv_info->cpuhp_state);
+		drv_info->cpuhp_state = 0;
+	}
 
-	if (drv_info->notif_pcpu_wq)
+	if (drv_info->notif_pcpu_wq) {
 		destroy_workqueue(drv_info->notif_pcpu_wq);
+		drv_info->notif_pcpu_wq = NULL;
+	}
 
 	if (drv_info->sched_recv_irq)
 		free_percpu_irq(drv_info->sched_recv_irq, drv_info->irq_pcpu);
 
-	if (drv_info->irq_pcpu)
+	if (drv_info->irq_pcpu) {
 		free_percpu(drv_info->irq_pcpu);
+		drv_info->irq_pcpu = NULL;
+	}
 }
 
 static int ffa_init_pcpu_irq(unsigned int irq)
@@ -1388,22 +1411,23 @@ static void ffa_notifications_cleanup(void)
 		ffa_notification_bitmap_destroy();
 		drv_info->bitmap_created = false;
 	}
+	drv_info->notif_enabled = false;
 }
 
-static int ffa_notifications_setup(void)
+static void ffa_notifications_setup(void)
 {
 	int ret, irq;
 
 	ret = ffa_features(FFA_NOTIFICATION_BITMAP_CREATE, 0, NULL, NULL);
 	if (ret) {
-		pr_err("Notifications not supported, continuing with it ..\n");
-		return 0;
+		pr_info("Notifications not supported, continuing with it ..\n");
+		return;
 	}
 
 	ret = ffa_notification_bitmap_create();
 	if (ret) {
-		pr_err("notification_bitmap_create error %d\n", ret);
-		return ret;
+		pr_info("Notification bitmap create error %d\n", ret);
+		return;
 	}
 	drv_info->bitmap_created = true;
 
@@ -1422,14 +1446,11 @@ static int ffa_notifications_setup(void)
 	hash_init(drv_info->notifier_hash);
 	mutex_init(&drv_info->notify_lock);
 
-	/* Register internal scheduling callback */
-	ret = ffa_sched_recv_cb_update(drv_info->vm_id, ffa_self_notif_handle,
-				       drv_info, true);
-	if (!ret)
-		return ret;
+	drv_info->notif_enabled = true;
+	return;
 cleanup:
+	pr_info("Notification setup failed %d, not enabled\n", ret);
 	ffa_notifications_cleanup();
-	return ret;
 }
 
 static int __init ffa_init(void)
@@ -1483,17 +1504,18 @@ static int __init ffa_init(void)
 	mutex_init(&drv_info->rx_lock);
 	mutex_init(&drv_info->tx_lock);
 
-	ffa_setup_partitions();
-
 	ffa_set_up_mem_ops_native_flag();
 
-	ret = ffa_notifications_setup();
+	ffa_notifications_setup();
+
+	ffa_setup_partitions();
+
+	ret = ffa_sched_recv_cb_update(drv_info->vm_id, ffa_self_notif_handle,
+				       drv_info, true);
 	if (ret)
-		goto partitions_cleanup;
+		pr_info("Failed to register driver sched callback %d\n", ret);
 
 	return 0;
-partitions_cleanup:
-	ffa_partitions_cleanup();
 free_pages:
 	if (drv_info->tx_buffer)
 		free_pages_exact(drv_info->tx_buffer, RXTX_BUFFER_SIZE);
diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c
index c2435be0ae1b..e11555de99ab 100644
--- a/drivers/firmware/arm_scmi/perf.c
+++ b/drivers/firmware/arm_scmi/perf.c
@@ -152,7 +152,7 @@ struct perf_dom_info {
 	u32 opp_count;
 	u32 sustained_freq_khz;
 	u32 sustained_perf_level;
-	u32 mult_factor;
+	unsigned long mult_factor;
 	struct scmi_perf_domain_info info;
 	struct scmi_opp opp[MAX_OPPS];
 	struct scmi_fc_info *fc_info;
@@ -268,13 +268,14 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph,
 		dom_info->sustained_perf_level =
 					le32_to_cpu(attr->sustained_perf_level);
 		if (!dom_info->sustained_freq_khz ||
-		    !dom_info->sustained_perf_level)
+		    !dom_info->sustained_perf_level ||
+		    dom_info->level_indexing_mode)
 			/* CPUFreq converts to kHz, hence default 1000 */
 			dom_info->mult_factor =	1000;
 		else
 			dom_info->mult_factor =
-					(dom_info->sustained_freq_khz * 1000) /
-					dom_info->sustained_perf_level;
+					(dom_info->sustained_freq_khz * 1000UL)
+					/ dom_info->sustained_perf_level;
 		strscpy(dom_info->info.name, attr->name,
 			SCMI_SHORT_NAME_MAX_SIZE);
 	}
@@ -798,7 +799,7 @@ static int scmi_dvfs_device_opps_add(const struct scmi_protocol_handle *ph,
 		if (!dom->level_indexing_mode)
 			freq = dom->opp[idx].perf * dom->mult_factor;
 		else
-			freq = dom->opp[idx].indicative_freq * 1000;
+			freq = dom->opp[idx].indicative_freq * dom->mult_factor;
 
 		data.level = dom->opp[idx].perf;
 		data.freq = freq;
@@ -845,7 +846,8 @@ static int scmi_dvfs_freq_set(const struct scmi_protocol_handle *ph, u32 domain,
 	} else {
 		struct scmi_opp *opp;
 
-		opp = LOOKUP_BY_FREQ(dom->opps_by_freq, freq / 1000);
+		opp = LOOKUP_BY_FREQ(dom->opps_by_freq,
+				     freq / dom->mult_factor);
 		if (!opp)
 			return -EIO;
 
@@ -879,7 +881,7 @@ static int scmi_dvfs_freq_get(const struct scmi_protocol_handle *ph, u32 domain,
 		if (!opp)
 			return -EIO;
 
-		*freq = opp->indicative_freq * 1000;
+		*freq = opp->indicative_freq * dom->mult_factor;
 	}
 
 	return ret;
@@ -902,7 +904,7 @@ static int scmi_dvfs_est_power_get(const struct scmi_protocol_handle *ph,
 		if (!dom->level_indexing_mode)
 			opp_freq = opp->perf * dom->mult_factor;
 		else
-			opp_freq = opp->indicative_freq * 1000;
+			opp_freq = opp->indicative_freq * dom->mult_factor;
 
 		if (opp_freq < *freq)
 			continue;
diff --git a/drivers/firmware/efi/unaccepted_memory.c b/drivers/firmware/efi/unaccepted_memory.c
index 3f2f7bf6e335..5b439d04079c 100644
--- a/drivers/firmware/efi/unaccepted_memory.c
+++ b/drivers/firmware/efi/unaccepted_memory.c
@@ -101,7 +101,7 @@ retry:
 	 * overlap on physical address level.
 	 */
 	list_for_each_entry(entry, &accepting_list, list) {
-		if (entry->end < range.start)
+		if (entry->end <= range.start)
 			continue;
 		if (entry->start >= range.end)
 			continue;
diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index 6f309a3b2d9a..12d853845bb8 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -474,14 +474,17 @@ static ssize_t export_store(const struct class *class,
 		goto done;
 
 	status = gpiod_set_transitory(desc, false);
-	if (!status) {
-		status = gpiod_export(desc, true);
-		if (status < 0)
-			gpiod_free(desc);
-		else
-			set_bit(FLAG_SYSFS, &desc->flags);
+	if (status) {
+		gpiod_free(desc);
+		goto done;
 	}
 
+	status = gpiod_export(desc, true);
+	if (status < 0)
+		gpiod_free(desc);
+	else
+		set_bit(FLAG_SYSFS, &desc->flags);
+
 done:
 	if (status)
 		pr_debug("%s: status %d\n", __func__, status);
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 31cfe2c2a2af..2520db0b776e 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -276,6 +276,8 @@ source "drivers/gpu/drm/nouveau/Kconfig"
 
 source "drivers/gpu/drm/i915/Kconfig"
 
+source "drivers/gpu/drm/xe/Kconfig"
+
 source "drivers/gpu/drm/kmb/Kconfig"
 
 config DRM_VGEM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 8ac6f4b9546e..104b42df2e95 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -134,6 +134,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
 obj-$(CONFIG_DRM_I915)	+= i915/
+obj-$(CONFIG_DRM_XE)	+= xe/
 obj-$(CONFIG_DRM_KMB_DISPLAY)  += kmb/
 obj-$(CONFIG_DRM_MGAG200) += mgag200/
 obj-$(CONFIG_DRM_V3D)  += v3d/
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2afecc55090f..260e32ef7bae 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
 	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
 	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
 	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-	amdgpu_ring_mux.o amdgpu_xcp.o
+	amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index 02f4c6f9d4f6..576067d66bb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -330,6 +330,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
 {
 	struct list_head *reset_device_list = reset_context->reset_device_list;
 	struct amdgpu_device *tmp_adev = NULL;
+	struct amdgpu_ras *con;
 	int r;
 
 	if (reset_device_list == NULL)
@@ -355,7 +356,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
 		 */
 		amdgpu_register_gpu_instance(tmp_adev);
 
-		/* Resume RAS */
+		/* Resume RAS, ecc_irq */
+		con = amdgpu_ras_get_context(tmp_adev);
+		if (!amdgpu_sriov_vf(tmp_adev) && con) {
+			if (tmp_adev->sdma.ras &&
+				tmp_adev->sdma.ras->ras_block.ras_late_init) {
+				r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev,
+						&tmp_adev->sdma.ras->ras_block.ras_comm);
+				if (r) {
+					dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r);
+					goto end;
+				}
+			}
+
+			if (tmp_adev->gfx.ras &&
+				tmp_adev->gfx.ras->ras_block.ras_late_init) {
+				r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev,
+						&tmp_adev->gfx.ras->ras_block.ras_comm);
+				if (r) {
+					dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r);
+					goto end;
+				}
+			}
+		}
+
 		amdgpu_ras_resume(tmp_adev);
 
 		/* Update PSP FW topology after reset */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9d92ca157677..9da14436a373 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -109,6 +109,8 @@
 #include "amdgpu_mca.h"
 #include "amdgpu_ras.h"
 #include "amdgpu_xcp.h"
+#include "amdgpu_seq64.h"
+#include "amdgpu_reg_state.h"
 
 #define MAX_GPU_INSTANCE		64
 
@@ -250,6 +252,10 @@ extern int amdgpu_seamless;
 extern int amdgpu_user_partt_mode;
 extern int amdgpu_agp;
 
+extern int amdgpu_wbrf;
+
+extern int fw_bo_location;
+
 #define AMDGPU_VM_MAX_NUM_CTX			4096
 #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
@@ -468,6 +474,7 @@ struct amdgpu_fpriv {
 	struct amdgpu_vm	vm;
 	struct amdgpu_bo_va	*prt_va;
 	struct amdgpu_bo_va	*csa_va;
+	struct amdgpu_bo_va	*seq64_va;
 	struct mutex		bo_list_lock;
 	struct idr		bo_list_handles;
 	struct amdgpu_ctx_mgr	ctx_mgr;
@@ -506,6 +513,31 @@ struct amdgpu_allowed_register_entry {
 	bool grbm_indexed;
 };
 
+/**
+ * enum amd_reset_method - Methods for resetting AMD GPU devices
+ *
+ * @AMD_RESET_METHOD_NONE: The device will not be reset.
+ * @AMD_RESET_LEGACY: Method reserved for SI, CIK and VI ASICs.
+ * @AMD_RESET_MODE0: Reset the entire ASIC. Not currently available for the
+ *                   any device.
+ * @AMD_RESET_MODE1: Resets all IP blocks on the ASIC (SDMA, GFX, VCN, etc.)
+ *                   individually. Suitable only for some discrete GPU, not
+ *                   available for all ASICs.
+ * @AMD_RESET_MODE2: Resets a lesser level of IPs compared to MODE1. Which IPs
+ *                   are reset depends on the ASIC. Notably doesn't reset IPs
+ *                   shared with the CPU on APUs or the memory controllers (so
+ *                   VRAM is not lost). Not available on all ASICs.
+ * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card
+ *                  but without powering off the PCI bus. Suitable only for
+ *                  discrete GPUs.
+ * @AMD_RESET_PCI: Does a full bus reset using core Linux subsystem PCI reset
+ *                 and does a secondary bus reset or FLR, depending on what the
+ *                 underlying hardware supports.
+ *
+ * Methods available for AMD GPU driver for resetting the device. Not all
+ * methods are suitable for every device. User can override the method using
+ * module parameter `reset_method`.
+ */
 enum amd_reset_method {
 	AMD_RESET_METHOD_NONE = -1,
 	AMD_RESET_METHOD_LEGACY = 0,
@@ -585,6 +617,10 @@ struct amdgpu_asic_funcs {
 				  const struct amdgpu_video_codecs **codecs);
 	/* encode "> 32bits" smn addressing */
 	u64 (*encode_ext_smn_addressing)(int ext_id);
+
+	ssize_t (*get_reg_state)(struct amdgpu_device *adev,
+				 enum amdgpu_reg_state reg_state, void *buf,
+				 size_t max_size);
 };
 
 /*
@@ -757,6 +793,7 @@ struct amdgpu_mqd_prop {
 	uint64_t eop_gpu_addr;
 	uint32_t hqd_pipe_priority;
 	uint32_t hqd_queue_priority;
+	bool allow_tunneling;
 	bool hqd_active;
 };
 
@@ -986,6 +1023,9 @@ struct amdgpu_device {
 	/* GDS */
 	struct amdgpu_gds		gds;
 
+	/* for userq and VM fences */
+	struct amdgpu_seq64		seq64;
+
 	/* KFD */
 	struct amdgpu_kfd_dev		kfd;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index b8412202a1b0..067690ba7bff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -142,6 +142,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
 	int i;
 	int last_valid_bit;
+	int ret;
 
 	amdgpu_amdkfd_gpuvm_init_mem_limits();
 
@@ -160,6 +161,12 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 			.enable_mes = adev->enable_mes,
 		};
 
+		ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", NULL);
+		if (ret) {
+			dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
+			return;
+		}
+
 		/* this is going to have a few of the MSBs set that we need to
 		 * clear
 		 */
@@ -198,6 +205,10 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 
 		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
 							&gpu_resources);
+		if (adev->kfd.init_complete)
+			drm_client_register(&adev->kfd.client);
+		else
+			drm_client_release(&adev->kfd.client);
 
 		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
 
@@ -547,7 +558,7 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
 	struct amdgpu_device *adev = dst, *peer_adev;
 	int num_links;
 
-	if (adev->asic_type != CHIP_ALDEBARAN)
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2))
 		return 0;
 
 	if (src)
@@ -710,35 +721,6 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
 	return false;
 }
 
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
-				     uint16_t vmid)
-{
-	if (adev->family == AMDGPU_FAMILY_AI) {
-		int i;
-
-		for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
-			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
-	} else {
-		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0);
-	}
-
-	return 0;
-}
-
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-				      uint16_t pasid,
-				      enum TLB_FLUSH_TYPE flush_type,
-				      uint32_t inst)
-{
-	bool all_hub = false;
-
-	if (adev->family == AMDGPU_FAMILY_AI ||
-	    adev->family == AMDGPU_FAMILY_RV)
-		all_hub = true;
-
-	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst);
-}
-
 bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
 {
 	return adev->have_atomics_support;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index dac983da961d..cf6ed5fce291 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -33,6 +33,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/memremap.h>
 #include <kgd_kfd_interface.h>
+#include <drm/drm_client.h>
 #include "amdgpu_sync.h"
 #include "amdgpu_vm.h"
 #include "amdgpu_xcp.h"
@@ -83,6 +84,7 @@ struct kgd_mem {
 
 	struct amdgpu_sync sync;
 
+	uint32_t gem_handle;
 	bool aql_queue;
 	bool is_imported;
 };
@@ -105,6 +107,9 @@ struct amdgpu_kfd_dev {
 
 	/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
 	struct dev_pagemap pgmap;
+
+	/* Client for KFD BO GEM handle allocations */
+	struct drm_client_dev client;
 };
 
 enum kgd_engine_type {
@@ -162,11 +167,6 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
 				uint32_t *ib_cmd, uint32_t ib_len);
 void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
 bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
-				uint16_t vmid);
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-				uint16_t pasid, enum TLB_FLUSH_TYPE flush_type,
-				uint32_t inst);
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 
@@ -314,11 +314,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
 					    struct dma_fence **ef);
 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
 					      struct kfd_vm_fault_info *info);
-int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
-				      struct dma_buf *dmabuf,
-				      uint64_t va, void *drm_priv,
-				      struct kgd_mem **mem, uint64_t *size,
-				      uint64_t *mmap_offset);
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+					 uint64_t va, void *drm_priv,
+					 struct kgd_mem **mem, uint64_t *size,
+					 uint64_t *mmap_offset);
 int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
 				      struct dma_buf **dmabuf);
 void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 10d56979fe3b..899e31e3a5e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -200,7 +200,7 @@ int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+6+7+10)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 469785d33791..1ef758ac5076 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -90,7 +90,7 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
 		return NULL;
 
 	fence = container_of(f, struct amdgpu_amdkfd_fence, base);
-	if (fence && f->ops == &amdkfd_fence_ops)
+	if (f->ops == &amdkfd_fence_ops)
 		return fence;
 
 	return NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index f6598b9e4faa..a5c7259cf2a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -141,7 +141,7 @@ static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev,
 		(*dump)[i++][1] = RREG32(addr);         \
 	} while (0)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 6bf448ab3dff..ca4a6b82817f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -214,7 +214,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev,
 		(*dump)[i++][1] = RREG32(addr);		\
 	} while (0)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
@@ -301,7 +301,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+4)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index cd06e4a6d1da..0f3e2944edd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -238,7 +238,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev,
 		(*dump)[i++][1] = RREG32(addr);		\
 	} while (0)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
@@ -324,7 +324,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+4+2+3+7)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 00fbc0f44c92..5a35a8ca8922 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -363,7 +363,7 @@ int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
 		(*dump)[i++][1] = RREG32(addr);		\
 	} while (0)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
@@ -460,7 +460,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
 #undef HQD_N_REGS
 #define HQD_N_REGS (19+6+7+10)
 
-	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
 	if (*dump == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 41fbc4fd0fac..d17b2452cb1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -25,6 +25,7 @@
 #include <linux/pagemap.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task.h>
+#include <linux/fdtable.h>
 #include <drm/ttm/ttm_tt.h>
 
 #include <drm/drm_exec.h>
@@ -806,13 +807,22 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
 static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
 {
 	if (!mem->dmabuf) {
-		struct dma_buf *ret = amdgpu_gem_prime_export(
-			&mem->bo->tbo.base,
+		struct amdgpu_device *bo_adev;
+		struct dma_buf *dmabuf;
+		int r, fd;
+
+		bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+		r = drm_gem_prime_handle_to_fd(&bo_adev->ddev, bo_adev->kfd.client.file,
+					       mem->gem_handle,
 			mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
-				DRM_RDWR : 0);
-		if (IS_ERR(ret))
-			return PTR_ERR(ret);
-		mem->dmabuf = ret;
+					       DRM_RDWR : 0, &fd);
+		if (r)
+			return r;
+		dmabuf = dma_buf_get(fd);
+		close_fd(fd);
+		if (WARN_ON_ONCE(IS_ERR(dmabuf)))
+			return PTR_ERR(dmabuf);
+		mem->dmabuf = dmabuf;
 	}
 
 	return 0;
@@ -1137,7 +1147,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 
 	ctx->n_vms = 1;
 	ctx->sync = &mem->sync;
-	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&ctx->exec) {
 		ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
 		drm_exec_retry_on_contention(&ctx->exec);
@@ -1176,7 +1186,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	int ret;
 
 	ctx->sync = &mem->sync;
-	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&ctx->exec) {
 		ctx->n_vms = 0;
 		list_for_each_entry(entry, &mem->attachments, list) {
@@ -1384,7 +1394,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 				  amdgpu_amdkfd_restore_userptr_worker);
 
 		*process_info = info;
-		*ef = dma_fence_get(&info->eviction_fence->base);
 	}
 
 	vm->process_info = *process_info;
@@ -1415,6 +1424,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 	list_add_tail(&vm->vm_list_node,
 			&(vm->process_info->vm_list_head));
 	vm->process_info->n_vms++;
+
+	*ef = dma_fence_get(&vm->process_info->eviction_fence->base);
 	mutex_unlock(&vm->process_info->lock);
 
 	return 0;
@@ -1426,10 +1437,7 @@ validate_pd_fail:
 reserve_pd_fail:
 	vm->process_info = NULL;
 	if (info) {
-		/* Two fence references: one in info and one in *ef */
 		dma_fence_put(&info->eviction_fence->base);
-		dma_fence_put(*ef);
-		*ef = NULL;
 		*process_info = NULL;
 		put_pid(info->pid);
 create_evict_fence_fail:
@@ -1623,7 +1631,8 @@ int amdgpu_amdkfd_criu_resume(void *p)
 		goto out_unlock;
 	}
 	WRITE_ONCE(pinfo->block_mmu_notifications, false);
-	schedule_delayed_work(&pinfo->restore_userptr_work, 0);
+	queue_delayed_work(system_freezable_wq,
+			   &pinfo->restore_userptr_work, 0);
 
 out_unlock:
 	mutex_unlock(&pinfo->lock);
@@ -1779,6 +1788,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		pr_debug("Failed to allow vma node access. ret %d\n", ret);
 		goto err_node_allow;
 	}
+	ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle);
+	if (ret)
+		goto err_gem_handle_create;
 	bo = gem_to_amdgpu_bo(gobj);
 	if (bo_type == ttm_bo_type_sg) {
 		bo->tbo.sg = sg;
@@ -1830,6 +1842,8 @@ allocate_init_user_pages_failed:
 err_pin_bo:
 err_validate_bo:
 	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+	drm_gem_handle_delete(adev->kfd.client.file, (*mem)->gem_handle);
+err_gem_handle_create:
 	drm_vma_node_revoke(&gobj->vma_node, drm_priv);
 err_node_allow:
 	/* Don't unreserve system mem limit twice */
@@ -1942,8 +1956,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 
 	/* Free the BO*/
 	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
-	if (mem->dmabuf)
+	drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle);
+	if (mem->dmabuf) {
 		dma_buf_put(mem->dmabuf);
+		mem->dmabuf = NULL;
+	}
 	mutex_destroy(&mem->lock);
 
 	/* If this releases the last reference, it will end up calling
@@ -2295,34 +2312,26 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
 	return 0;
 }
 
-int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
-				      struct dma_buf *dma_buf,
-				      uint64_t va, void *drm_priv,
-				      struct kgd_mem **mem, uint64_t *size,
-				      uint64_t *mmap_offset)
+static int import_obj_create(struct amdgpu_device *adev,
+			     struct dma_buf *dma_buf,
+			     struct drm_gem_object *obj,
+			     uint64_t va, void *drm_priv,
+			     struct kgd_mem **mem, uint64_t *size,
+			     uint64_t *mmap_offset)
 {
 	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
-	struct drm_gem_object *obj;
 	struct amdgpu_bo *bo;
 	int ret;
 
-	obj = amdgpu_gem_prime_import(adev_to_drm(adev), dma_buf);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
 	bo = gem_to_amdgpu_bo(obj);
 	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
-				    AMDGPU_GEM_DOMAIN_GTT))) {
+				    AMDGPU_GEM_DOMAIN_GTT)))
 		/* Only VRAM and GTT BOs are supported */
-		ret = -EINVAL;
-		goto err_put_obj;
-	}
+		return -EINVAL;
 
 	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-	if (!*mem) {
-		ret = -ENOMEM;
-		goto err_put_obj;
-	}
+	if (!*mem)
+		return -ENOMEM;
 
 	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
 	if (ret)
@@ -2372,8 +2381,41 @@ err_remove_mem:
 	drm_vma_node_revoke(&obj->vma_node, drm_priv);
 err_free_mem:
 	kfree(*mem);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+					 uint64_t va, void *drm_priv,
+					 struct kgd_mem **mem, uint64_t *size,
+					 uint64_t *mmap_offset)
+{
+	struct drm_gem_object *obj;
+	uint32_t handle;
+	int ret;
+
+	ret = drm_gem_prime_fd_to_handle(&adev->ddev, adev->kfd.client.file, fd,
+					 &handle);
+	if (ret)
+		return ret;
+	obj = drm_gem_object_lookup(adev->kfd.client.file, handle);
+	if (!obj) {
+		ret = -EINVAL;
+		goto err_release_handle;
+	}
+
+	ret = import_obj_create(adev, obj->dma_buf, obj, va, drm_priv, mem, size,
+				mmap_offset);
+	if (ret)
+		goto err_put_obj;
+
+	(*mem)->gem_handle = handle;
+
+	return 0;
+
 err_put_obj:
 	drm_gem_object_put(obj);
+err_release_handle:
+	drm_gem_handle_delete(adev->kfd.client.file, handle);
 	return ret;
 }
 
@@ -2426,7 +2468,8 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
 				       KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
 		if (r)
 			pr_err("Failed to quiesce KFD\n");
-		schedule_delayed_work(&process_info->restore_userptr_work,
+		queue_delayed_work(system_freezable_wq,
+			&process_info->restore_userptr_work,
 			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
 	}
 	mutex_unlock(&process_info->notifier_lock);
@@ -2552,7 +2595,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 
 	amdgpu_sync_create(&sync);
 
-	drm_exec_init(&exec, 0);
+	drm_exec_init(&exec, 0, 0);
 	/* Reserve all BOs and page tables for validation */
 	drm_exec_until_all_locked(&exec) {
 		/* Reserve all the page directories */
@@ -2749,7 +2792,8 @@ unlock_out:
 
 	/* If validation failed, reschedule another attempt */
 	if (evicted_bos) {
-		schedule_delayed_work(&process_info->restore_userptr_work,
+		queue_delayed_work(system_freezable_wq,
+			&process_info->restore_userptr_work,
 			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
 
 		kfd_smi_event_queue_restore_rescheduled(mm);
@@ -2758,6 +2802,23 @@ unlock_out:
 	put_task_struct(usertask);
 }
 
+static void replace_eviction_fence(struct dma_fence **ef,
+				   struct dma_fence *new_ef)
+{
+	struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true
+		/* protected by process_info->lock */);
+
+	/* If we're replacing an unsignaled eviction fence, that fence will
+	 * never be signaled, and if anyone is still waiting on that fence,
+	 * they will hang forever. This should never happen. We should only
+	 * replace the fence in restore_work that only gets scheduled after
+	 * eviction work signaled the fence.
+	 */
+	WARN_ONCE(!dma_fence_is_signaled(old_ef),
+		  "Replacing unsignaled eviction fence");
+	dma_fence_put(old_ef);
+}
+
 /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
  *   KFD process identified by process_info
  *
@@ -2781,7 +2842,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	struct amdkfd_process_info *process_info = info;
 	struct amdgpu_vm *peer_vm;
 	struct kgd_mem *mem;
-	struct amdgpu_amdkfd_fence *new_fence;
 	struct list_head duplicate_save;
 	struct amdgpu_sync sync_obj;
 	unsigned long failed_size = 0;
@@ -2793,7 +2853,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 
 	mutex_lock(&process_info->lock);
 
-	drm_exec_init(&exec, 0);
+	drm_exec_init(&exec, 0, 0);
 	drm_exec_until_all_locked(&exec) {
 		list_for_each_entry(peer_vm, &process_info->vm_list_head,
 				    vm_list_node) {
@@ -2825,12 +2885,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	if (ret)
 		goto validate_map_fail;
 
-	ret = process_sync_pds_resv(process_info, &sync_obj);
-	if (ret) {
-		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
-		goto validate_map_fail;
-	}
-
 	/* Validate BOs and map them to GPUVM (update VM page tables). */
 	list_for_each_entry(mem, &process_info->kfd_bo_list,
 			    validate_list) {
@@ -2881,6 +2935,19 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	if (failed_size)
 		pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
 
+	/* Update mappings not managed by KFD */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			vm_list_node) {
+		struct amdgpu_device *adev = amdgpu_ttm_adev(
+			peer_vm->root.bo->tbo.bdev);
+
+		ret = amdgpu_vm_handle_moved(adev, peer_vm, &exec.ticket);
+		if (ret) {
+			pr_debug("Memory eviction: handle moved failed. Try again\n");
+			goto validate_map_fail;
+		}
+	}
+
 	/* Update page directories */
 	ret = process_update_pds(process_info, &sync_obj);
 	if (ret) {
@@ -2888,25 +2955,47 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 		goto validate_map_fail;
 	}
 
+	/* Sync with fences on all the page tables. They implicitly depend on any
+	 * move fences from amdgpu_vm_handle_moved above.
+	 */
+	ret = process_sync_pds_resv(process_info, &sync_obj);
+	if (ret) {
+		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
+		goto validate_map_fail;
+	}
+
 	/* Wait for validate and PT updates to finish */
 	amdgpu_sync_wait(&sync_obj, false);
 
-	/* Release old eviction fence and create new one, because fence only
-	 * goes from unsignaled to signaled, fence cannot be reused.
-	 * Use context and mm from the old fence.
+	/* The old eviction fence may be unsignaled if restore happens
+	 * after a GPU reset or suspend/resume. Keep the old fence in that
+	 * case. Otherwise release the old eviction fence and create new
+	 * one, because fence only goes from unsignaled to signaled once
+	 * and cannot be reused. Use context and mm from the old fence.
+	 *
+	 * If an old eviction fence signals after this check, that's OK.
+	 * Anyone signaling an eviction fence must stop the queues first
+	 * and schedule another restore worker.
 	 */
-	new_fence = amdgpu_amdkfd_fence_create(
+	if (dma_fence_is_signaled(&process_info->eviction_fence->base)) {
+		struct amdgpu_amdkfd_fence *new_fence =
+			amdgpu_amdkfd_fence_create(
 				process_info->eviction_fence->base.context,
 				process_info->eviction_fence->mm,
 				NULL);
-	if (!new_fence) {
-		pr_err("Failed to create eviction fence\n");
-		ret = -ENOMEM;
-		goto validate_map_fail;
+
+		if (!new_fence) {
+			pr_err("Failed to create eviction fence\n");
+			ret = -ENOMEM;
+			goto validate_map_fail;
+		}
+		dma_fence_put(&process_info->eviction_fence->base);
+		process_info->eviction_fence = new_fence;
+		replace_eviction_fence(ef, dma_fence_get(&new_fence->base));
+	} else {
+		WARN_ONCE(*ef != &process_info->eviction_fence->base,
+			  "KFD eviction fence doesn't match KGD process_info");
 	}
-	dma_fence_put(&process_info->eviction_fence->base);
-	process_info->eviction_fence = new_fence;
-	*ef = dma_fence_get(&new_fence->base);
 
 	/* Attach new eviction fence to all BOs except pinned ones */
 	list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 7473a42f7d45..9caba10315a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -103,7 +103,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 	struct amdgpu_connector_atom_dig *dig_connector;
 	int bpc = 8;
-	unsigned mode_clock, max_tmds_clock;
+	unsigned int mode_clock, max_tmds_clock;
 
 	switch (connector->connector_type) {
 	case DRM_MODE_CONNECTOR_DVII:
@@ -255,6 +255,7 @@ struct edid *amdgpu_connector_edid(struct drm_connector *connector)
 		return amdgpu_connector->edid;
 	} else if (edid_blob) {
 		struct edid *edid = kmemdup(edid_blob->data, edid_blob->length, GFP_KERNEL);
+
 		if (edid)
 			amdgpu_connector->edid = edid;
 	}
@@ -581,6 +582,7 @@ static int amdgpu_connector_set_property(struct drm_connector *connector,
 			amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
 		} else {
 			const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
 			amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
 		}
 
@@ -797,6 +799,7 @@ static int amdgpu_connector_set_lcd_property(struct drm_connector *connector,
 		amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
 	else {
 		const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
 		amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
 	}
 
@@ -979,6 +982,41 @@ amdgpu_connector_check_hpd_status_unchanged(struct drm_connector *connector)
 	return false;
 }
 
+static void amdgpu_connector_shared_ddc(enum drm_connector_status *status,
+					struct drm_connector *connector,
+					struct amdgpu_connector *amdgpu_connector)
+{
+	struct drm_connector *list_connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *list_amdgpu_connector;
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	if (amdgpu_connector->shared_ddc && *status == connector_status_connected) {
+		drm_connector_list_iter_begin(dev, &iter);
+		drm_for_each_connector_iter(list_connector,
+					    &iter) {
+			if (connector == list_connector)
+				continue;
+			list_amdgpu_connector = to_amdgpu_connector(list_connector);
+			if (list_amdgpu_connector->shared_ddc &&
+			    list_amdgpu_connector->ddc_bus->rec.i2c_id ==
+			     amdgpu_connector->ddc_bus->rec.i2c_id) {
+				/* cases where both connectors are digital */
+				if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
+					/* hpd is our only option in this case */
+					if (!amdgpu_display_hpd_sense(adev,
+								      amdgpu_connector->hpd.hpd)) {
+						amdgpu_connector_free_edid(connector);
+						*status = connector_status_disconnected;
+					}
+				}
+			}
+		}
+		drm_connector_list_iter_end(&iter);
+	}
+}
+
 /*
  * DVI is complicated
  * Do a DDC probe, if DDC probe passes, get the full EDID so
@@ -1065,32 +1103,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 			 * DDC line.  The latter is more complex because with DVI<->HDMI adapters
 			 * you don't really know what's connected to which port as both are digital.
 			 */
-			if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) {
-				struct drm_connector *list_connector;
-				struct drm_connector_list_iter iter;
-				struct amdgpu_connector *list_amdgpu_connector;
-
-				drm_connector_list_iter_begin(dev, &iter);
-				drm_for_each_connector_iter(list_connector,
-							    &iter) {
-					if (connector == list_connector)
-						continue;
-					list_amdgpu_connector = to_amdgpu_connector(list_connector);
-					if (list_amdgpu_connector->shared_ddc &&
-					    (list_amdgpu_connector->ddc_bus->rec.i2c_id ==
-					     amdgpu_connector->ddc_bus->rec.i2c_id)) {
-						/* cases where both connectors are digital */
-						if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
-							/* hpd is our only option in this case */
-							if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
-								amdgpu_connector_free_edid(connector);
-								ret = connector_status_disconnected;
-							}
-						}
-					}
-				}
-				drm_connector_list_iter_end(&iter);
-			}
+			amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector);
 		}
 	}
 
@@ -1192,6 +1205,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
 static void amdgpu_connector_dvi_force(struct drm_connector *connector)
 {
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
 	if (connector->force == DRM_FORCE_ON)
 		amdgpu_connector->use_digital = false;
 	if (connector->force == DRM_FORCE_ON_DIGITAL)
@@ -1426,6 +1440,7 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
 				ret = connector_status_connected;
 			else if (amdgpu_connector->dac_load_detect) { /* try load detection */
 				const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+
 				ret = encoder_funcs->detect(encoder, connector);
 			}
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e50be6500030..6adeddfb3d56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -66,7 +66,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
 
 	amdgpu_sync_create(&p->sync);
 	drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
-		      DRM_EXEC_IGNORE_DUPLICATES);
+		      DRM_EXEC_IGNORE_DUPLICATES, 0);
 	return 0;
 }
 
@@ -870,9 +870,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		struct amdgpu_bo *bo = e->bo;
 		int i;
 
-		e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
-					sizeof(struct page *),
-					GFP_KERNEL | __GFP_ZERO);
+		e->user_pages = kvcalloc(bo->tbo.ttm->num_pages,
+					 sizeof(struct page *),
+					 GFP_KERNEL);
 		if (!e->user_pages) {
 			DRM_ERROR("kvmalloc_array failure\n");
 			r = -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 720011019741..796fa6f1420b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -70,7 +70,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	struct drm_exec exec;
 	int r;
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = amdgpu_vm_lock_pd(vm, &exec, 0);
 		if (likely(!r))
@@ -110,7 +110,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	struct drm_exec exec;
 	int r;
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = amdgpu_vm_lock_pd(vm, &exec, 0);
 		if (likely(!r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c1efa13bccbb..e485dd3357c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -540,7 +540,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 	while (size) {
 		uint32_t value;
 
-		value = RREG32_PCIE(*pos);
+		if (upper_32_bits(*pos))
+			value = RREG32_PCIE_EXT(*pos);
+		else
+			value = RREG32_PCIE(*pos);
+
 		r = put_user(value, (uint32_t *)buf);
 		if (r)
 			goto out;
@@ -600,7 +604,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 		if (r)
 			goto out;
 
-		WREG32_PCIE(*pos, value);
+		if (upper_32_bits(*pos))
+			WREG32_PCIE_EXT(*pos, value);
+		else
+			WREG32_PCIE(*pos, value);
 
 		result += 4;
 		buf += 4;
@@ -638,6 +645,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	if (!adev->didt_rreg)
+		return -EOPNOTSUPP;
+
 	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
 	if (r < 0) {
 		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -694,6 +704,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	if (!adev->didt_wreg)
+		return -EOPNOTSUPP;
+
 	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
 	if (r < 0) {
 		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -749,7 +762,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 	int r;
 
 	if (!adev->smc_rreg)
-		return -EPERM;
+		return -EOPNOTSUPP;
 
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
@@ -808,7 +821,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	int r;
 
 	if (!adev->smc_wreg)
-		return -EPERM;
+		return -EOPNOTSUPP;
 
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
@@ -2141,6 +2154,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
 	amdgpu_debugfs_firmware_init(adev);
 	amdgpu_ta_if_debugfs_init(adev);
 
+	amdgpu_debugfs_mes_event_log_init(adev);
+
 #if defined(CONFIG_DRM_AMD_DC)
 	if (adev->dc_enabled)
 		dtn_debugfs_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index 371a6f0deb29..0425432d8659 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
 void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2a6684a38714..5bb444bb36ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -162,6 +162,65 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 static DEVICE_ATTR(pcie_replay_count, 0444,
 		amdgpu_device_get_pcie_replay_count, NULL);
 
+static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
+					  struct bin_attribute *attr, char *buf,
+					  loff_t ppos, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	ssize_t bytes_read;
+
+	switch (ppos) {
+	case AMDGPU_SYS_REG_STATE_XGMI:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_WAFL:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_PCIE:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_USR:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_USR_1:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return bytes_read;
+}
+
+BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
+	 AMDGPU_SYS_REG_STATE_END);
+
+int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
+{
+	int ret;
+
+	if (!amdgpu_asic_get_reg_state_supported(adev))
+		return 0;
+
+	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+
+	return ret;
+}
+
+void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
+{
+	if (!amdgpu_asic_get_reg_state_supported(adev))
+		return;
+	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+}
+
 /**
  * DOC: board_info
  *
@@ -1540,7 +1599,7 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
 	if (adev->mman.keep_stolen_vga_memory)
 		return false;
 
-	return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
+	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
 }
 
 /*
@@ -1551,11 +1610,15 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
  */
-static bool amdgpu_device_pcie_dynamic_switching_supported(void)
+static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
 {
 #if IS_ENABLED(CONFIG_X86)
 	struct cpuinfo_x86 *c = &cpu_data(0);
 
+	/* eGPU change speeds based on USB4 fabric conditions */
+	if (dev_is_removable(adev->dev))
+		return true;
+
 	if (c->x86_vendor == X86_VENDOR_INTEL)
 		return false;
 #endif
@@ -2188,15 +2251,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 
 	adev->firmware.gpu_info_fw = NULL;
 
-	if (adev->mman.discovery_bin) {
-		/*
-		 * FIXME: The bounding box is still needed by Navi12, so
-		 * temporarily read it from gpu_info firmware. Should be dropped
-		 * when DAL no longer needs it.
-		 */
-		if (adev->asic_type != CHIP_NAVI12)
-			return 0;
-	}
+	if (adev->mman.discovery_bin)
+		return 0;
 
 	switch (adev->asic_type) {
 	default:
@@ -2395,7 +2451,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
-	if (!amdgpu_device_pcie_dynamic_switching_supported())
+	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
 
 	total = true;
@@ -2676,6 +2732,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 					goto init_failed;
 				}
 			}
+
+			r = amdgpu_seq64_init(adev);
+			if (r) {
+				DRM_ERROR("allocate seq64 failed %d\n", r);
+				goto init_failed;
+			}
 		}
 	}
 
@@ -3138,6 +3200,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 			amdgpu_device_wb_fini(adev);
 			amdgpu_device_mem_scratch_fini(adev);
 			amdgpu_ib_pool_fini(adev);
+			amdgpu_seq64_fini(adev);
 		}
 
 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
@@ -3791,10 +3854,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
 		adev->gfx.mcbp = true;
 	else if (amdgpu_mcbp == 0)
 		adev->gfx.mcbp = false;
-	else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
-		 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
-		 adev->gfx.num_gfx_rings)
-		adev->gfx.mcbp = true;
 
 	if (amdgpu_sriov_vf(adev))
 		adev->gfx.mcbp = true;
@@ -4222,6 +4281,7 @@ fence_driver_init:
 			"Could not create amdgpu board attributes\n");
 
 	amdgpu_fru_sysfs_init(adev);
+	amdgpu_reg_state_sysfs_init(adev);
 
 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
 		r = amdgpu_pmu_init(adev);
@@ -4344,6 +4404,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
 	amdgpu_fru_sysfs_fini(adev);
 
+	amdgpu_reg_state_sysfs_fini(adev);
+
 	/* disable ras feature must before hw fini */
 	amdgpu_ras_pre_fini(adev);
 
@@ -4520,8 +4582,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 
 	amdgpu_ras_suspend(adev);
 
-	amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
 	amdgpu_device_ip_suspend_phase1(adev);
 
 	if (!adev->in_s0ix)
@@ -4531,6 +4591,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 	if (r)
 		return r;
 
+	amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
 	amdgpu_fence_driver_hw_fini(adev);
 
 	amdgpu_device_ip_suspend_phase2(adev);
@@ -4538,6 +4600,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 	if (amdgpu_sriov_vf(adev))
 		amdgpu_virt_release_full_gpu(adev, false);
 
+	r = amdgpu_dpm_notify_rlc_state(adev, false);
+	if (r)
+		return r;
+
 	return 0;
 }
 
@@ -5731,6 +5797,39 @@ recover_end:
 }
 
 /**
+ * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
+ *
+ * @adev: amdgpu_device pointer
+ * @speed: pointer to the speed of the link
+ * @width: pointer to the width of the link
+ *
+ * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
+ * first physical partner to an AMD dGPU.
+ * This will exclude any virtual switches and links.
+ */
+static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
+					    enum pci_bus_speed *speed,
+					    enum pcie_link_width *width)
+{
+	struct pci_dev *parent = adev->pdev;
+
+	if (!speed || !width)
+		return;
+
+	*speed = PCI_SPEED_UNKNOWN;
+	*width = PCIE_LNK_WIDTH_UNKNOWN;
+
+	while ((parent = pci_upstream_bridge(parent))) {
+		/* skip upstream/downstream switches internal to dGPU*/
+		if (parent->vendor == PCI_VENDOR_ID_ATI)
+			continue;
+		*speed = pcie_get_speed_cap(parent);
+		*width = pcie_get_width_cap(parent);
+		break;
+	}
+}
+
+/**
  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
  *
  * @adev: amdgpu_device pointer
@@ -5763,8 +5862,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
 		return;
 
-	pcie_bandwidth_available(adev->pdev, NULL,
-				 &platform_speed_cap, &platform_link_width);
+	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
+					&platform_link_width);
 
 	if (adev->pm.pcie_gen_mask == 0) {
 		/* asic caps */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 0cacd0b9f8be..b8fbe97efe1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -340,14 +340,11 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
 		adev->have_disp_power_ref = true;
 		return ret;
 	}
-	/* if we have no active crtcs, then drop the power ref
-	 * we got before
+	/* if we have no active crtcs, then go to
+	 * drop the power ref we got before
 	 */
-	if (!active && adev->have_disp_power_ref) {
-		pm_runtime_put_autosuspend(dev->dev);
+	if (!active && adev->have_disp_power_ref)
 		adev->have_disp_power_ref = false;
-	}
-
 out:
 	/* drop the power reference we got coming in here */
 	pm_runtime_put_autosuspend(dev->dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index e7e87a3b2601..decbbe3d4f06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -42,6 +42,7 @@
 #include <linux/dma-fence-array.h>
 #include <linux/pci-p2pdma.h>
 #include <linux/pm_runtime.h>
+#include "amdgpu_trace.h"
 
 /**
  * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
@@ -63,6 +64,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
 		attach->peer2peer = false;
 
 	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	trace_amdgpu_runpm_reference_dumps(1, __func__);
 	if (r < 0)
 		goto out;
 
@@ -70,6 +72,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
 
 out:
 	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	trace_amdgpu_runpm_reference_dumps(0, __func__);
 	return r;
 }
 
@@ -90,6 +93,7 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
 
 	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	trace_amdgpu_runpm_reference_dumps(0, __func__);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8f24cabe2155..852cec98ff26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -115,9 +115,10 @@
  *   3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
  * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query
  * - 3.56.0 - Update IB start address and size alignment for decode and encode
+ * - 3.57.0 - Compute tunneling on GFX10+
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	56
+#define KMS_DRIVER_MINOR	57
 #define KMS_DRIVER_PATCHLEVEL	0
 
 /*
@@ -208,6 +209,8 @@ int amdgpu_umsch_mm;
 int amdgpu_seamless = -1; /* auto */
 uint amdgpu_debug_mask;
 int amdgpu_agp = -1; /* auto */
+int amdgpu_wbrf = -1;
+int fw_bo_location = -1;
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -971,6 +974,26 @@ module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
 MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)");
 module_param_named(agp, amdgpu_agp, int, 0444);
 
+/**
+ * DOC: wbrf (int)
+ * Enable Wifi RFI interference mitigation feature.
+ * Due to electrical and mechanical constraints there may be likely interference of
+ * relatively high-powered harmonics of the (G-)DDR memory clocks with local radio
+ * module frequency bands used by Wifi 6/6e/7. To mitigate the possible RFI interference,
+ * with this feature enabled, PMFW will use either “shadowed P-State” or “P-State” based
+ * on active list of frequencies in-use (to be avoided) as part of initial setting or
+ * P-state transition. However, there may be potential performance impact with this
+ * feature enabled.
+ * (0 = disabled, 1 = enabled, -1 = auto (default setting, will be enabled if supported))
+ */
+MODULE_PARM_DESC(wbrf,
+	"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
+module_param_named(wbrf, amdgpu_wbrf, int, 0444);
+
+MODULE_PARM_DESC(fw_bo_location,
+	"location to put firmware bo for frontdoor loading (-1 = auto (default), 0 = on ram, 1 = on vram");
+module_param(fw_bo_location, int, 0644);
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
@@ -2263,6 +2286,8 @@ retry_init:
 		pm_runtime_mark_last_busy(ddev->dev);
 		pm_runtime_put_autosuspend(ddev->dev);
 
+		pci_wake_from_d3(pdev, TRUE);
+
 		/*
 		 * For runpm implemented via BACO, PMFW will handle the
 		 * timing for BACO in and out:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index dc230212746a..70bff8cecfda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -183,6 +183,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
 			       seq, flags | AMDGPU_FENCE_FLAG_INT);
 	pm_runtime_get_noresume(adev_to_drm(adev)->dev);
+	trace_amdgpu_runpm_reference_dumps(1, __func__);
 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
 	if (unlikely(rcu_dereference_protected(*ptr, 1))) {
 		struct dma_fence *old;
@@ -310,6 +311,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
 		dma_fence_put(fence);
 		pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		trace_amdgpu_runpm_reference_dumps(0, __func__);
 	} while (last_seq != seq);
 
 	return true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 84beeaa4d21c..49a5f1c73b3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -203,7 +203,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	struct drm_exec exec;
 	long r;
 
-	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES);
+	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
 		drm_exec_retry_on_contention(&exec);
@@ -739,7 +739,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	}
 
 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
-		      DRM_EXEC_IGNORE_DUPLICATES);
+		      DRM_EXEC_IGNORE_DUPLICATES, 0);
 	drm_exec_until_all_locked(&exec) {
 		if (gobj) {
 			r = drm_exec_lock_obj(&exec, gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 5f71414190e9..d2f273d77e59 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -181,6 +181,9 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 
+	if (!bo->ttm)
+		return AMDGPU_BO_INVALID_OFFSET;
+
 	if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached)
 		return AMDGPU_BO_INVALID_OFFSET;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index 081267161d40..55b65fc04b65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -190,8 +190,8 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 		pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
 			hmm_range->start, hmm_range->end);
 
-		/* Assuming 128MB takes maximum 1 second to fault page address */
-		timeout = max((hmm_range->end - hmm_range->start) >> 27, 1UL);
+		/* Assuming 64MB takes maximum 1 second to fault page address */
+		timeout = max((hmm_range->end - hmm_range->start) >> 26, 1UL);
 		timeout *= HMM_RANGE_DEFAULT_TIMEOUT;
 		timeout = jiffies + msecs_to_jiffies(timeout);
 
@@ -199,6 +199,7 @@ retry:
 		hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
 		r = hmm_range_fault(hmm_range);
 		if (unlikely(r)) {
+			schedule();
 			/*
 			 * FIXME: This timeout should encompass the retry from
 			 * mmu_interval_read_retry() as well.
@@ -212,7 +213,6 @@ retry:
 			break;
 		hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
 		hmm_range->start = hmm_range->end;
-		schedule();
 	} while (hmm_range->end < end);
 
 	hmm_range->start = start;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 583cf03950cd..b5ebafd4a3ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1428,6 +1428,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 		fpriv->csa_va = NULL;
 	}
 
+	amdgpu_seq64_unmap(adev, fpriv);
+
 	pasid = fpriv->vm.pasid;
 	pd = amdgpu_bo_ref(fpriv->vm.root.bo);
 	if (!WARN_ON(amdgpu_bo_reserve(pd, true))) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index cf33eb219e25..59fafb8392e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -218,6 +218,7 @@ static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, st
 int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data)
 {
 	struct amdgpu_smuio_mcm_config_info mcm_info;
+	struct ras_err_addr err_addr = {0};
 	struct mca_bank_set mca_set;
 	struct mca_bank_node *node;
 	struct mca_bank_entry *entry;
@@ -246,10 +247,18 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo
 		mcm_info.socket_id = entry->info.socket_id;
 		mcm_info.die_id = entry->info.aid;
 
+		if (blk == AMDGPU_RAS_BLOCK__UMC) {
+			err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS];
+			err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID];
+			err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR];
+		}
+
 		if (type == AMDGPU_MCA_ERROR_TYPE_UE)
-			amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, (uint64_t)count);
+			amdgpu_ras_error_statistic_ue_count(err_data,
+				&mcm_info, &err_addr, (uint64_t)count);
 		else
-			amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, (uint64_t)count);
+			amdgpu_ras_error_statistic_ce_count(err_data,
+				&mcm_info, &err_addr, (uint64_t)count);
 	}
 
 out_mca_release:
@@ -351,6 +360,9 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err
 	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
 	int count;
 
+	if (!mca_funcs || !mca_funcs->mca_get_mca_entry)
+		return -EOPNOTSUPP;
+
 	switch (type) {
 	case AMDGPU_MCA_ERROR_TYPE_UE:
 		count = mca_funcs->max_ue_count;
@@ -365,10 +377,7 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err
 	if (idx >= count)
 		return -EINVAL;
 
-	if (mca_funcs && mca_funcs->mca_get_mca_entry)
-		return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);
-
-	return -EOPNOTSUPP;
+	return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);
 }
 
 #if defined(CONFIG_DEBUG_FS)
@@ -377,7 +386,7 @@ static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val)
 	struct amdgpu_device *adev = (struct amdgpu_device *)data;
 	int ret;
 
-	ret = amdgpu_mca_smu_set_debug_mode(adev, val ? true : false);
+	ret = amdgpu_ras_set_mca_debug_mode(adev, val ? true : false);
 	if (ret)
 		return ret;
 
@@ -485,7 +494,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_se
 void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
 {
 #if defined(CONFIG_DEBUG_FS)
-	if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6))
+	if (!root || amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 6))
 		return;
 
 	debugfs_create_file("mca_debug_mode", 0200, root, adev, &mca_debug_mode_fops);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index 2b488fcf2f95..b399f1b62887 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -46,6 +46,10 @@
 #define MCA_REG__STATUS__ERRORCODEEXT(x)	MCA_REG_FIELD(x, 21, 16)
 #define MCA_REG__STATUS__ERRORCODE(x)		MCA_REG_FIELD(x, 15, 0)
 
+#define MCA_REG__MISC0__ERRCNT(x)		MCA_REG_FIELD(x, 43, 32)
+
+#define MCA_REG__SYND__ERRORINFORMATION(x)	MCA_REG_FIELD(x, 17, 0)
+
 enum amdgpu_mca_ip {
 	AMDGPU_MCA_IP_UNKNOW = -1,
 	AMDGPU_MCA_IP_PSP = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 9ddbf1494326..da48b6da0107 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -98,6 +98,26 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_GTT,
+				    &adev->mes.event_log_gpu_obj,
+				    &adev->mes.event_log_gpu_addr,
+				    &adev->mes.event_log_cpu_addr);
+	if (r) {
+		dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r);
+		return r;
+	}
+
+	memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
+
+	return  0;
+
+}
+
 static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
 {
 	bitmap_free(adev->mes.doorbell_bitmap);
@@ -182,8 +202,14 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
 	if (r)
 		goto error;
 
+	r = amdgpu_mes_event_log_init(adev);
+	if (r)
+		goto error_doorbell;
+
 	return 0;
 
+error_doorbell:
+	amdgpu_mes_doorbell_free(adev);
 error:
 	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
 	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
@@ -199,6 +225,10 @@ error_ids:
 
 void amdgpu_mes_fini(struct amdgpu_device *adev)
 {
+	amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
+			      &adev->mes.event_log_gpu_addr,
+			      &adev->mes.event_log_cpu_addr);
+
 	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
 	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
 	amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
@@ -886,6 +916,11 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
 	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
 	op_input.set_shader_debugger.process_context_addr = process_context_addr;
 	op_input.set_shader_debugger.flags.u32all = flags;
+
+	/* use amdgpu mes_flush_shader_debugger instead */
+	if (op_input.set_shader_debugger.flags.process_ctx_flush)
+		return -EINVAL;
+
 	op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
 	memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
 			sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
@@ -905,6 +940,32 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
 	return r;
 }
 
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+				     uint64_t process_context_addr)
+{
+	struct mes_misc_op_input op_input = {0};
+	int r;
+
+	if (!adev->mes.funcs->misc_op) {
+		DRM_ERROR("mes flush shader debugger is not supported!\n");
+		return -EINVAL;
+	}
+
+	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
+	op_input.set_shader_debugger.process_context_addr = process_context_addr;
+	op_input.set_shader_debugger.flags.process_ctx_flush = true;
+
+	amdgpu_mes_lock(&adev->mes);
+
+	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+	if (r)
+		DRM_ERROR("failed to set_shader_debugger\n");
+
+	amdgpu_mes_unlock(&adev->mes);
+
+	return r;
+}
+
 static void
 amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
 			       struct amdgpu_ring *ring,
@@ -1122,7 +1183,7 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
 
 	amdgpu_sync_create(&sync);
 
-	drm_exec_init(&exec, 0);
+	drm_exec_init(&exec, 0, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = drm_exec_lock_obj(&exec,
 				      &ctx_data->meta_data_obj->tbo.base);
@@ -1193,7 +1254,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
 	struct drm_exec exec;
 	long r;
 
-	drm_exec_init(&exec, 0);
+	drm_exec_init(&exec, 0, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = drm_exec_lock_obj(&exec,
 				      &ctx_data->meta_data_obj->tbo.base);
@@ -1479,3 +1540,34 @@ out:
 	amdgpu_ucode_release(&adev->mes.fw[pipe]);
 	return r;
 }
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
+{
+	struct amdgpu_device *adev = m->private;
+	uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
+
+	seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
+		     mem, PAGE_SIZE, false);
+
+	return 0;
+}
+
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
+
+#endif
+
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
+{
+
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+
+	debugfs_create_file("amdgpu_mes_event_log", 0444, root,
+			    adev, &amdgpu_debugfs_mes_event_log_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index a27b424ffe00..7d4f93fea937 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -133,6 +133,11 @@ struct amdgpu_mes {
 	uint32_t			num_mes_dbs;
 	unsigned long			*doorbell_bitmap;
 
+	/* MES event log buffer */
+	struct amdgpu_bo		*event_log_gpu_obj;
+	uint64_t                        event_log_gpu_addr;
+	void				*event_log_cpu_addr;
+
 	/* ip specific functions */
 	const struct amdgpu_mes_funcs   *funcs;
 };
@@ -291,9 +296,10 @@ struct mes_misc_op_input {
 			uint64_t process_context_addr;
 			union {
 				struct {
-					uint64_t single_memop : 1;
-					uint64_t single_alu_op : 1;
-					uint64_t reserved: 30;
+					uint32_t single_memop : 1;
+					uint32_t single_alu_op : 1;
+					uint32_t reserved: 29;
+					uint32_t process_ctx_flush: 1;
 				};
 				uint32_t u32all;
 			} flags;
@@ -369,7 +375,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
 				const uint32_t *tcp_watch_cntl,
 				uint32_t flags,
 				bool trap_en);
-
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+				uint64_t process_context_addr);
 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
 			int queue_type, int idx,
 			struct amdgpu_mes_ctx_data *ctx_data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 32fe05c810c6..2e4911050cc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -32,7 +32,6 @@
 
 #include <drm/display/drm_dp_helper.h>
 #include <drm/drm_crtc.h>
-#include <drm/drm_edid.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_fixed.h>
 #include <drm/drm_framebuffer.h>
@@ -51,6 +50,7 @@ struct amdgpu_device;
 struct amdgpu_encoder;
 struct amdgpu_router;
 struct amdgpu_hpd;
+struct edid;
 
 #define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base)
 #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
@@ -343,6 +343,97 @@ struct amdgpu_mode_info {
 	int			disp_priority;
 	const struct amdgpu_display_funcs *funcs;
 	const enum drm_plane_type *plane_type;
+
+	/* Driver-private color mgmt props */
+
+	/* @plane_degamma_lut_property: Plane property to set a degamma LUT to
+	 * convert encoded values to light linear values before sampling or
+	 * blending.
+	 */
+	struct drm_property *plane_degamma_lut_property;
+	/* @plane_degamma_lut_size_property: Plane property to define the max
+	 * size of degamma LUT as supported by the driver (read-only).
+	 */
+	struct drm_property *plane_degamma_lut_size_property;
+	/**
+	 * @plane_degamma_tf_property: Plane pre-defined transfer function to
+	 * to go from scanout/encoded values to linear values.
+	 */
+	struct drm_property *plane_degamma_tf_property;
+	/**
+	 * @plane_hdr_mult_property:
+	 */
+	struct drm_property *plane_hdr_mult_property;
+
+	struct drm_property *plane_ctm_property;
+	/**
+	 * @shaper_lut_property: Plane property to set pre-blending shaper LUT
+	 * that converts color content before 3D LUT. If
+	 * plane_shaper_tf_property != Identity TF, AMD color module will
+	 * combine the user LUT values with pre-defined TF into the LUT
+	 * parameters to be programmed.
+	 */
+	struct drm_property *plane_shaper_lut_property;
+	/**
+	 * @shaper_lut_size_property: Plane property for the size of
+	 * pre-blending shaper LUT as supported by the driver (read-only).
+	 */
+	struct drm_property *plane_shaper_lut_size_property;
+	/**
+	 * @plane_shaper_tf_property: Plane property to set a predefined
+	 * transfer function for pre-blending shaper (before applying 3D LUT)
+	 * with or without LUT. There is no shaper ROM, but we can use AMD
+	 * color modules to program LUT parameters from predefined TF (or
+	 * from a combination of pre-defined TF and the custom 1D LUT).
+	 */
+	struct drm_property *plane_shaper_tf_property;
+	/**
+	 * @plane_lut3d_property: Plane property for color transformation using
+	 * a 3D LUT (pre-blending), a three-dimensional array where each
+	 * element is an RGB triplet. Each dimension has the size of
+	 * lut3d_size. The array contains samples from the approximated
+	 * function. On AMD, values between samples are estimated by
+	 * tetrahedral interpolation. The array is accessed with three indices,
+	 * one for each input dimension (color channel), blue being the
+	 * outermost dimension, red the innermost.
+	 */
+	struct drm_property *plane_lut3d_property;
+	/**
+	 * @plane_degamma_lut_size_property: Plane property to define the max
+	 * size of 3D LUT as supported by the driver (read-only). The max size
+	 * is the max size of one dimension and, therefore, the max number of
+	 * entries for 3D LUT array is the 3D LUT size cubed;
+	 */
+	struct drm_property *plane_lut3d_size_property;
+	/**
+	 * @plane_blend_lut_property: Plane property for output gamma before
+	 * blending. Userspace set a blend LUT to convert colors after 3D LUT
+	 * conversion. It works as a post-3DLUT 1D LUT. With shaper LUT, they
+	 * are sandwiching 3D LUT with two 1D LUT. If plane_blend_tf_property
+	 * != Identity TF, AMD color module will combine the user LUT values
+	 * with pre-defined TF into the LUT parameters to be programmed.
+	 */
+	struct drm_property *plane_blend_lut_property;
+	/**
+	 * @plane_blend_lut_size_property: Plane property to define the max
+	 * size of blend LUT as supported by the driver (read-only).
+	 */
+	struct drm_property *plane_blend_lut_size_property;
+	/**
+	 * @plane_blend_tf_property: Plane property to set a predefined
+	 * transfer function for pre-blending blend/out_gamma (after applying
+	 * 3D LUT) with or without LUT. There is no blend ROM, but we can use
+	 * AMD color modules to program LUT parameters from predefined TF (or
+	 * from a combination of pre-defined TF and the custom 1D LUT).
+	 */
+	struct drm_property *plane_blend_tf_property;
+	/* @regamma_tf_property: Transfer function for CRTC regamma
+	 * (post-blending). Possible values are defined by `enum
+	 * amdgpu_transfer_function`. There is no regamma ROM, but we can use
+	 * AMD color modules to program LUT parameters from predefined TF (or
+	 * from a combination of pre-defined TF and the custom 1D LUT).
+	 */
+	struct drm_property *regamma_tf_property;
 };
 
 #define AMDGPU_MAX_BL_LEVEL 0xFF
@@ -416,6 +507,10 @@ struct amdgpu_crtc {
 
 	int otg_inst;
 	struct drm_pending_vblank_event *event;
+
+	bool wb_pending;
+	bool wb_enabled;
+	struct drm_writeback_connector *wb_conn;
 };
 
 struct amdgpu_encoder_atom_dig {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index cef920a93924..425cebcc5cbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1245,19 +1245,15 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
  * amdgpu_bo_move_notify - notification about a memory move
  * @bo: pointer to a buffer object
  * @evict: if this move is evicting the buffer from the graphics address space
- * @new_mem: new information of the bufer object
  *
  * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
  * bookkeeping.
  * TTM driver callback which is called when ttm moves a buffer.
  */
-void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
-			   bool evict,
-			   struct ttm_resource *new_mem)
+void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 	struct amdgpu_bo *abo;
-	struct ttm_resource *old_mem = bo->resource;
 
 	if (!amdgpu_bo_is_amdgpu_bo(bo))
 		return;
@@ -1274,13 +1270,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 	/* remember the eviction */
 	if (evict)
 		atomic64_inc(&adev->num_evictions);
-
-	/* update statistics */
-	if (!new_mem)
-		return;
-
-	/* move_notify is called before move happens */
-	trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
 }
 
 void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
@@ -1343,6 +1332,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
 
 	abo = ttm_to_amdgpu_bo(bo);
 
+	WARN_ON(abo->vm_bo);
+
 	if (abo->kfd_bo)
 		amdgpu_amdkfd_release_notify(abo);
 
@@ -1527,10 +1518,14 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
 u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	uint64_t offset;
+	uint64_t offset = AMDGPU_BO_INVALID_OFFSET;
+
+	if (bo->tbo.resource->mem_type == TTM_PL_TT)
+		offset = amdgpu_gmc_agp_addr(&bo->tbo);
 
-	offset = (bo->tbo.resource->start << PAGE_SHIFT) +
-		 amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type);
+	if (offset == AMDGPU_BO_INVALID_OFFSET)
+		offset = (bo->tbo.resource->start << PAGE_SHIFT) +
+			amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type);
 
 	return amdgpu_gmc_sign_extend(offset);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index d28e21baef16..a3ea8a82db23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -344,9 +344,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
 int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
 			   size_t buffer_size, uint32_t *metadata_size,
 			   uint64_t *flags);
-void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
-			   bool evict,
-			   struct ttm_resource *new_mem);
+void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict);
 void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
 vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a21045d018f2..2addbdf88394 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -466,7 +466,7 @@ static int psp_sw_init(void *handle)
 	}
 
 	ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
-				      amdgpu_sriov_vf(adev) ?
+				      (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ?
 				      AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
 				      &psp->fw_pri_bo,
 				      &psp->fw_pri_mc_addr,
@@ -1433,8 +1433,8 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
 			 get_extended_data) ||
 			amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
 				IP_VERSION(13, 0, 6);
-		bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps &
-						EXTEND_PEER_LINK_INFO_CMD_FLAG;
+		bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? 0 :
+				psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG;
 
 		/* popluate the shared output buffer rather than the cmd input buffer
 		 * with node_ids as the input for GET_PEER_LINKS command execution.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a3dc68e98910..fc42fb6ee191 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -28,6 +28,7 @@
 #include <linux/reboot.h>
 #include <linux/syscalls.h>
 #include <linux/pm_runtime.h>
+#include <linux/list_sort.h>
 
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
@@ -1155,8 +1156,10 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s
 		for_each_ras_error(err_node, err_data) {
 			err_info = &err_node->err_info;
 
-			amdgpu_ras_error_statistic_ce_count(&obj->err_data, &err_info->mcm_info, err_info->ce_count);
-			amdgpu_ras_error_statistic_ue_count(&obj->err_data, &err_info->mcm_info, err_info->ue_count);
+			amdgpu_ras_error_statistic_ce_count(&obj->err_data,
+					&err_info->mcm_info, NULL, err_info->ce_count);
+			amdgpu_ras_error_statistic_ue_count(&obj->err_data,
+					&err_info->mcm_info, NULL, err_info->ue_count);
 		}
 	} else {
 		/* for legacy asic path which doesn't has error source info */
@@ -1173,6 +1176,9 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
 	enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT;
 	struct amdgpu_ras_block_object *block_obj = NULL;
 
+	if (blk == AMDGPU_RAS_BLOCK_COUNT)
+		return -EINVAL;
+
 	if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY)
 		return -EINVAL;
 
@@ -2537,7 +2543,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
 		return 0;
 
 	data = &con->eh_data;
-	*data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
+	*data = kzalloc(sizeof(**data), GFP_KERNEL);
 	if (!*data) {
 		ret = -ENOMEM;
 		goto out;
@@ -2824,10 +2830,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 	if (con)
 		return 0;
 
-	con = kmalloc(sizeof(struct amdgpu_ras) +
+	con = kzalloc(sizeof(*con) +
 			sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT +
 			sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,
-			GFP_KERNEL|__GFP_ZERO);
+			GFP_KERNEL);
 	if (!con)
 		return -ENOMEM;
 
@@ -3132,6 +3138,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
+	amdgpu_ras_set_mca_debug_mode(adev, false);
+
 	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
 		if (!node->ras_obj) {
 			dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
@@ -3405,12 +3413,18 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 	return 0;
 }
 
-void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable)
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	int ret = 0;
 
-	if (con)
-		con->is_mca_debug_mode = enable;
+	if (con) {
+		ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
+		if (!ret)
+			con->is_mca_debug_mode = enable;
+	}
+
+	return ret;
 }
 
 bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev)
@@ -3665,8 +3679,24 @@ static struct ras_err_node *amdgpu_ras_error_node_new(void)
 	return err_node;
 }
 
+static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b)
+{
+	struct ras_err_node *nodea = container_of(a, struct ras_err_node, node);
+	struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node);
+	struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info;
+	struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info;
+
+	if (unlikely(infoa->socket_id != infob->socket_id))
+		return infoa->socket_id - infob->socket_id;
+	else
+		return infoa->die_id - infob->die_id;
+
+	return 0;
+}
+
 static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data,
-						      struct amdgpu_smuio_mcm_config_info *mcm_info)
+				struct amdgpu_smuio_mcm_config_info *mcm_info,
+				struct ras_err_addr *err_addr)
 {
 	struct ras_err_node *err_node;
 
@@ -3680,14 +3710,19 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d
 
 	memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info));
 
+	if (err_addr)
+		memcpy(&err_node->err_info.err_addr, err_addr, sizeof(*err_addr));
+
 	err_data->err_list_count++;
 	list_add_tail(&err_node->node, &err_data->err_node_list);
+	list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp);
 
 	return &err_node->err_info;
 }
 
 int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
-					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count)
+		struct amdgpu_smuio_mcm_config_info *mcm_info,
+		struct ras_err_addr *err_addr, u64 count)
 {
 	struct ras_err_info *err_info;
 
@@ -3697,7 +3732,7 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
 	if (!count)
 		return 0;
 
-	err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+	err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr);
 	if (!err_info)
 		return -EINVAL;
 
@@ -3708,7 +3743,8 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
 }
 
 int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
-					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count)
+		struct amdgpu_smuio_mcm_config_info *mcm_info,
+		struct ras_err_addr *err_addr, u64 count)
 {
 	struct ras_err_info *err_info;
 
@@ -3718,7 +3754,7 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
 	if (!count)
 		return 0;
 
-	err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+	err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr);
 	if (!err_info)
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 19161916ac46..76fb85628716 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -452,10 +452,17 @@ struct ras_fs_data {
 	char debugfs_name[32];
 };
 
+struct ras_err_addr {
+	uint64_t err_status;
+	uint64_t err_ipid;
+	uint64_t err_addr;
+};
+
 struct ras_err_info {
 	struct amdgpu_smuio_mcm_config_info mcm_info;
 	u64 ce_count;
 	u64 ue_count;
+	struct ras_err_addr err_addr;
 };
 
 struct ras_err_node {
@@ -773,7 +780,7 @@ struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev);
 
 int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con);
 
-void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable);
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable);
 bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev);
 bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
 				     unsigned int *mode);
@@ -806,8 +813,10 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
 int amdgpu_ras_error_data_init(struct ras_err_data *err_data);
 void amdgpu_ras_error_data_fini(struct ras_err_data *err_data);
 int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
-					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count);
+		struct amdgpu_smuio_mcm_config_info *mcm_info,
+		struct ras_err_addr *err_addr, u64 count);
 int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
-					struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count);
+		struct amdgpu_smuio_mcm_config_info *mcm_info,
+		struct ras_err_addr *err_addr, u64 count);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 65aa218380be..2fde93b00cab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -214,6 +214,12 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
 			control->i2c_address = EEPROM_I2C_MADDR_0;
 		return true;
 	case IP_VERSION(13, 0, 0):
+		if (strnstr(atom_ctx->vbios_pn, "D707",
+			    sizeof(atom_ctx->vbios_pn)))
+			control->i2c_address = EEPROM_I2C_MADDR_0;
+		else
+			control->i2c_address = EEPROM_I2C_MADDR_4;
+		return true;
 	case IP_VERSION(13, 0, 6):
 	case IP_VERSION(13, 0, 10):
 		control->i2c_address = EEPROM_I2C_MADDR_4;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 6a80d3ec887e..45424ebf9681 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -642,6 +642,10 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
 				    struct amdgpu_mqd_prop *prop)
 {
 	struct amdgpu_device *adev = ring->adev;
+	bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
+				    amdgpu_gfx_is_high_priority_compute_queue(adev, ring);
+	bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
+				amdgpu_gfx_is_high_priority_graphics_queue(adev, ring);
 
 	memset(prop, 0, sizeof(*prop));
 
@@ -659,10 +663,8 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
 	 */
 	prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
 
-	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
-	     amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) ||
-	    (ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
-	     amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) {
+	prop->allow_tunneling = is_high_prio_compute;
+	if (is_high_prio_compute || is_high_prio_gfx) {
 		prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
 		prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index 35e0ae9acadc..2c3675d91614 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -531,13 +531,12 @@ int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
 	if (version_major == 2 && version_minor == 1)
 		adev->gfx.rlc.is_rlc_v2_1 = true;
 
-	if (version_minor >= 0) {
-		err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
-		if (err) {
-			dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
-			return err;
-		}
+	err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
+	if (err) {
+		dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
+		return err;
 	}
+
 	if (version_minor >= 1)
 		amdgpu_gfx_rlc_init_microcode_v2_1(adev);
 	if (version_minor >= 2)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
new file mode 100644
index 000000000000..7a6a67275404
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_seq64.h"
+
+#include <drm/drm_exec.h>
+
+/**
+ * DOC: amdgpu_seq64
+ *
+ * amdgpu_seq64 allocates a 64bit memory on each request in sequence order.
+ * seq64 driver is required for user queue fence memory allocation, TLB
+ * counters and VM updates. It has maximum count of 32768 64 bit slots.
+ */
+
+/**
+ * amdgpu_seq64_map - Map the seq64 memory to VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: vm pointer
+ * @bo_va: bo_va pointer
+ * @seq64_addr: seq64 vaddr start address
+ * @size: seq64 pool size
+ *
+ * Map the seq64 memory to the given VM.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		     struct amdgpu_bo_va **bo_va, u64 seq64_addr,
+		     uint32_t size)
+{
+	struct amdgpu_bo *bo;
+	struct drm_exec exec;
+	int r;
+
+	bo = adev->seq64.sbo;
+	if (!bo)
+		return -EINVAL;
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+	drm_exec_until_all_locked(&exec) {
+		r = amdgpu_vm_lock_pd(vm, &exec, 0);
+		if (likely(!r))
+			r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(r))
+			goto error;
+	}
+
+	*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+	if (!*bo_va) {
+		r = -ENOMEM;
+		goto error;
+	}
+
+	r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, size,
+			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
+			     AMDGPU_PTE_EXECUTABLE);
+	if (r) {
+		DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
+		amdgpu_vm_bo_del(adev, *bo_va);
+		goto error;
+	}
+
+	r = amdgpu_vm_bo_update(adev, *bo_va, false);
+	if (r) {
+		DRM_ERROR("failed to do vm_bo_update on userq sem\n");
+		amdgpu_vm_bo_del(adev, *bo_va);
+		goto error;
+	}
+
+error:
+	drm_exec_fini(&exec);
+	return r;
+}
+
+/**
+ * amdgpu_seq64_unmap - Unmap the seq64 memory
+ *
+ * @adev: amdgpu_device pointer
+ * @fpriv: DRM file private
+ *
+ * Unmap the seq64 memory from the given VM.
+ */
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv)
+{
+	struct amdgpu_vm *vm;
+	struct amdgpu_bo *bo;
+	struct drm_exec exec;
+	int r;
+
+	if (!fpriv->seq64_va)
+		return;
+
+	bo = adev->seq64.sbo;
+	if (!bo)
+		return;
+
+	vm = &fpriv->vm;
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+	drm_exec_until_all_locked(&exec) {
+		r = amdgpu_vm_lock_pd(vm, &exec, 0);
+		if (likely(!r))
+			r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(r))
+			goto error;
+	}
+
+	amdgpu_vm_bo_del(adev, fpriv->seq64_va);
+
+	fpriv->seq64_va = NULL;
+
+error:
+	drm_exec_fini(&exec);
+}
+
+/**
+ * amdgpu_seq64_alloc - Allocate a 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @gpu_addr: allocated gpu VA start address
+ * @cpu_addr: allocated cpu VA start address
+ *
+ * Alloc a 64 bit memory from seq64 pool.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr,
+		       u64 **cpu_addr)
+{
+	unsigned long bit_pos;
+	u32 offset;
+
+	bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem);
+
+	if (bit_pos < adev->seq64.num_sem) {
+		__set_bit(bit_pos, adev->seq64.used);
+		offset = bit_pos << 6; /* convert to qw offset */
+	} else {
+		return -EINVAL;
+	}
+
+	*gpu_addr = offset + AMDGPU_SEQ64_VADDR_START;
+	*cpu_addr = offset + adev->seq64.cpu_base_addr;
+
+	return 0;
+}
+
+/**
+ * amdgpu_seq64_free - Free the given 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @gpu_addr: gpu start address to be freed
+ *
+ * Free the given 64 bit memory from seq64 pool.
+ *
+ */
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr)
+{
+	u32 offset;
+
+	offset = gpu_addr - AMDGPU_SEQ64_VADDR_START;
+
+	offset >>= 6;
+	if (offset < adev->seq64.num_sem)
+		__clear_bit(offset, adev->seq64.used);
+}
+
+/**
+ * amdgpu_seq64_fini - Cleanup seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free the memory space allocated for seq64.
+ *
+ */
+void amdgpu_seq64_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->seq64.sbo,
+			      NULL,
+			      (void **)&adev->seq64.cpu_base_addr);
+}
+
+/**
+ * amdgpu_seq64_init - Initialize seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate the required memory space for seq64.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (adev->seq64.sbo)
+		return 0;
+
+	/*
+	 * AMDGPU_MAX_SEQ64_SLOTS * sizeof(u64) * 8 = AMDGPU_MAX_SEQ64_SLOTS
+	 * 64bit slots
+	 */
+	r = amdgpu_bo_create_kernel(adev, AMDGPU_SEQ64_SIZE,
+				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+				    &adev->seq64.sbo, NULL,
+				    (void **)&adev->seq64.cpu_base_addr);
+	if (r) {
+		dev_warn(adev->dev, "(%d) create seq64 failed\n", r);
+		return r;
+	}
+
+	memset(adev->seq64.cpu_base_addr, 0, AMDGPU_SEQ64_SIZE);
+
+	adev->seq64.num_sem = AMDGPU_MAX_SEQ64_SLOTS;
+	memset(&adev->seq64.used, 0, sizeof(adev->seq64.used));
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
new file mode 100644
index 000000000000..2196e72be508
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SEQ64_H__
+#define __AMDGPU_SEQ64_H__
+
+#define AMDGPU_SEQ64_SIZE		(2ULL << 20)
+#define AMDGPU_MAX_SEQ64_SLOTS		(AMDGPU_SEQ64_SIZE / (sizeof(u64) * 8))
+#define AMDGPU_SEQ64_VADDR_OFFSET	0x50000
+#define AMDGPU_SEQ64_VADDR_START	(AMDGPU_VA_RESERVED_SIZE + AMDGPU_SEQ64_VADDR_OFFSET)
+
+struct amdgpu_seq64 {
+	struct amdgpu_bo *sbo;
+	u32 num_sem;
+	u64 *cpu_base_addr;
+	DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS);
+};
+
+void amdgpu_seq64_fini(struct amdgpu_device *adev);
+int amdgpu_seq64_init(struct amdgpu_device *adev);
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr);
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr);
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		     struct amdgpu_bo_va **bo_va, u64 seq64_addr, uint32_t size);
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv);
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index dcd8c066bc1f..1b013a44ca99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -191,7 +191,8 @@ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
 
 	/* Never sync to VM updates either. */
 	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
-	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+	    owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
+	    owner != AMDGPU_FENCE_OWNER_KFD)
 		return false;
 
 	/* Ignore fences depending on the sync mode */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 2fd1bfb35916..f539b1d00234 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -554,6 +554,21 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
 		      __entry->value)
 );
 
+TRACE_EVENT(amdgpu_runpm_reference_dumps,
+	    TP_PROTO(uint32_t index, const char *func),
+	    TP_ARGS(index, func),
+	    TP_STRUCT__entry(
+			     __field(uint32_t, index)
+			     __string(func, func)
+			     ),
+	    TP_fast_assign(
+			   __entry->index = index;
+			   __assign_str(func, func);
+			   ),
+	    TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n",
+		      __entry->index,
+		      __get_str(func))
+);
 #undef AMDGPU_JOB_GET_TIMELINE_NAME
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 05991c5c8ddb..75c9fd2c6c2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -545,10 +545,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 			return r;
 	}
 
+	trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
 out:
 	/* update statistics */
 	atomic64_add(bo->base.size, &adev->num_bytes_moved);
-	amdgpu_bo_move_notify(bo, evict, new_mem);
+	amdgpu_bo_move_notify(bo, evict);
 	return 0;
 }
 
@@ -959,10 +960,8 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 		return 0;
 
 	addr = amdgpu_gmc_agp_addr(bo);
-	if (addr != AMDGPU_BO_INVALID_OFFSET) {
-		bo->resource->start = addr >> PAGE_SHIFT;
+	if (addr != AMDGPU_BO_INVALID_OFFSET)
 		return 0;
-	}
 
 	/* allocate GART space */
 	placement.num_placement = 1;
@@ -1555,7 +1554,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 static void
 amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
 {
-	amdgpu_bo_move_notify(bo, false, NULL);
+	amdgpu_bo_move_notify(bo, false);
 }
 
 static struct ttm_device_funcs amdgpu_bo_driver = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index b14127429f30..d334e42fe0eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1062,7 +1062,8 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
 {
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
 		amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
-			amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+			(amdgpu_sriov_vf(adev) || fw_bo_location == 1) ?
+			AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
 			&adev->firmware.fw_buf,
 			&adev->firmware.fw_buf_mc,
 			&adev->firmware.fw_buf_ptr);
@@ -1397,9 +1398,13 @@ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
 
 	if (err)
 		return -ENODEV;
+
 	err = amdgpu_ucode_validate(*fw);
-	if (err)
+	if (err) {
 		dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name);
+		release_firmware(*fw);
+		*fw = NULL;
+	}
 
 	return err;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
index ca45ba8ac171..bfbf59326ee1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
@@ -86,7 +86,7 @@ static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	amdgpu_sync_create(&sync);
 
-	drm_exec_init(&exec, 0);
+	drm_exec_init(&exec, 0, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = drm_exec_lock_obj(&exec, &bo->tbo.base);
 		drm_exec_retry_on_contention(&exec);
@@ -149,7 +149,7 @@ static int unmap_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	struct drm_exec exec;
 	long r;
 
-	drm_exec_init(&exec, 0);
+	drm_exec_init(&exec, 0, 0);
 	drm_exec_until_all_locked(&exec) {
 		r = drm_exec_lock_obj(&exec, &bo->tbo.base);
 		drm_exec_retry_on_contention(&exec);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3a632c3b1a2c..0dcff2889e25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1099,7 +1099,8 @@ bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev)
 {
 	bool xnack_mode = true;
 
-	if (amdgpu_sriov_vf(adev) && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+	if (amdgpu_sriov_vf(adev) &&
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
 		xnack_mode = false;
 
 	return xnack_mode;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index db6fc0cb18eb..453a4b786cfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0+
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_edid.h>
 #include <drm/drm_simple_kms_helper.h>
 #include <drm/drm_vblank.h>
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d1b8afd105c9..b8fcb6c55698 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -285,6 +285,7 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
 	list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
 		struct amdgpu_bo *bo = vm_bo->bo;
 
+		vm_bo->moved = true;
 		if (!bo || bo->tbo.type != ttm_bo_type_kernel)
 			list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
 		else if (bo->parent)
@@ -1438,6 +1439,51 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 }
 
 /**
+ * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @flush_type: flush type
+ * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush.
+ *
+ * Flush TLB if needed for a compute VM.
+ *
+ * Returns:
+ * 0 for success.
+ */
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				uint32_t flush_type,
+				uint32_t xcc_mask)
+{
+	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
+	bool all_hub = false;
+	int xcc = 0, r = 0;
+
+	WARN_ON_ONCE(!vm->is_compute_context);
+
+	/*
+	 * It can be that we race and lose here, but that is extremely unlikely
+	 * and the worst thing which could happen is that we flush the changes
+	 * into the TLB once more which is harmless.
+	 */
+	if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
+		return 0;
+
+	if (adev->family == AMDGPU_FAMILY_AI ||
+	    adev->family == AMDGPU_FAMILY_RV)
+		all_hub = true;
+
+	for_each_inst(xcc, xcc_mask) {
+		r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type,
+						   all_hub, xcc);
+		if (r)
+			break;
+	}
+	return r;
+}
+
+/**
  * amdgpu_vm_bo_add - add a bo to a specific vm
  *
  * @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 2cd86d2bf73f..b6cd565562ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -324,6 +324,7 @@ struct amdgpu_vm {
 	/* Last finished delayed update */
 	atomic64_t		tlb_seq;
 	struct dma_fence	*last_tlb_flush;
+	atomic64_t		kfd_last_flushed_seq;
 
 	/* How many times we had to re-generate the page tables */
 	uint64_t		generation;
@@ -445,6 +446,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 			   struct amdgpu_vm *vm,
 			   struct ww_acquire_ctx *ticket);
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				uint32_t flush_type,
+				uint32_t xcc_mask);
 void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 			    struct amdgpu_vm *vm, struct amdgpu_bo *bo);
 int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index a2287bb25223..a160265ddc07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -642,13 +642,14 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
 
 	if (!entry->bo)
 		return;
+
+	entry->bo->vm_bo = NULL;
 	shadow = amdgpu_bo_shadowed(entry->bo);
 	if (shadow) {
 		ttm_bo_set_bulk_move(&shadow->tbo, NULL);
 		amdgpu_bo_unref(&shadow);
 	}
 	ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
-	entry->bo->vm_bo = NULL;
 
 	spin_lock(&entry->vm->status_lock);
 	list_del(&entry->vm_status);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index e81579708e96..6f149b54d4d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -26,6 +26,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_vpe.h"
+#include "amdgpu_smu.h"
 #include "soc15_common.h"
 #include "vpe_v6_1.h"
 
@@ -33,8 +34,186 @@
 /* VPE CSA resides in the 4th page of CSA */
 #define AMDGPU_CSA_VPE_OFFSET 	(4096 * 3)
 
+/* 1 second timeout */
+#define VPE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+#define VPE_MAX_DPM_LEVEL			4
+#define FIXED1_8_BITS_PER_FRACTIONAL_PART	8
+#define GET_PRATIO_INTEGER_PART(x)		((x) >> FIXED1_8_BITS_PER_FRACTIONAL_PART)
+
 static void vpe_set_ring_funcs(struct amdgpu_device *adev);
 
+static inline uint16_t div16_u16_rem(uint16_t dividend, uint16_t divisor, uint16_t *remainder)
+{
+	*remainder = dividend % divisor;
+	return dividend / divisor;
+}
+
+static inline uint16_t complete_integer_division_u16(
+	uint16_t dividend,
+	uint16_t divisor,
+	uint16_t *remainder)
+{
+	return div16_u16_rem(dividend, divisor, (uint16_t *)remainder);
+}
+
+static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator)
+{
+	bool arg1_negative = numerator < 0;
+	bool arg2_negative = denominator < 0;
+
+	uint16_t arg1_value = (uint16_t)(arg1_negative ? -numerator : numerator);
+	uint16_t arg2_value = (uint16_t)(arg2_negative ? -denominator : denominator);
+
+	uint16_t remainder;
+
+	/* determine integer part */
+	uint16_t res_value = complete_integer_division_u16(
+		arg1_value, arg2_value, &remainder);
+
+	if (res_value > 127 /* CHAR_MAX */)
+		return 0;
+
+	/* determine fractional part */
+	{
+		unsigned int i = FIXED1_8_BITS_PER_FRACTIONAL_PART;
+
+		do {
+			remainder <<= 1;
+
+			res_value <<= 1;
+
+			if (remainder >= arg2_value) {
+				res_value |= 1;
+				remainder -= arg2_value;
+			}
+		} while (--i != 0);
+	}
+
+	/* round up LSB */
+	{
+		uint16_t summand = (remainder << 1) >= arg2_value;
+
+		if ((res_value + summand) > 32767 /* SHRT_MAX */)
+			return 0;
+
+		res_value += summand;
+	}
+
+	if (arg1_negative ^ arg2_negative)
+		res_value = -res_value;
+
+	return res_value;
+}
+
+static uint16_t vpe_internal_get_pratio(uint16_t from_frequency, uint16_t to_frequency)
+{
+	uint16_t pratio = vpe_u1_8_from_fraction(from_frequency, to_frequency);
+
+	if (GET_PRATIO_INTEGER_PART(pratio) > 1)
+		pratio = 0;
+
+	return pratio;
+}
+
+/*
+ * VPE has 4 DPM levels from level 0 (lowerest) to 3 (highest),
+ * VPE FW will dynamically decide which level should be used according to current loading.
+ *
+ * Get VPE and SOC clocks from PM, and select the appropriate four clock values,
+ * calculate the ratios of adjusting from one clock to another.
+ * The VPE FW can then request the appropriate frequency from the PMFW.
+ */
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe)
+{
+	struct amdgpu_device *adev = vpe->ring.adev;
+	uint32_t dpm_ctl;
+
+	if (adev->pm.dpm_enabled) {
+		struct dpm_clocks clock_table = { 0 };
+		struct dpm_clock *VPEClks;
+		struct dpm_clock *SOCClks;
+		uint32_t idx;
+		uint32_t pratio_vmax_vnorm = 0, pratio_vnorm_vmid = 0, pratio_vmid_vmin = 0;
+		uint16_t pratio_vmin_freq = 0, pratio_vmid_freq = 0, pratio_vnorm_freq = 0, pratio_vmax_freq = 0;
+
+		dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+		dpm_ctl |= 1; /* DPM enablement */
+		WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+
+		/* Get VPECLK and SOCCLK */
+		if (amdgpu_dpm_get_dpm_clock_table(adev, &clock_table)) {
+			dev_dbg(adev->dev, "%s: get clock failed!\n", __func__);
+			goto disable_dpm;
+		}
+
+		SOCClks = clock_table.SocClocks;
+		VPEClks = clock_table.VPEClocks;
+
+		/* vpe dpm only cares 4 levels. */
+		for (idx = 0; idx < VPE_MAX_DPM_LEVEL; idx++) {
+			uint32_t soc_dpm_level;
+			uint32_t min_freq;
+
+			if (idx == 0)
+				soc_dpm_level = 0;
+			else
+				soc_dpm_level = (idx * 2) + 1;
+
+			/* clamp the max level */
+			if (soc_dpm_level > PP_SMU_NUM_VPECLK_DPM_LEVELS - 1)
+				soc_dpm_level = PP_SMU_NUM_VPECLK_DPM_LEVELS - 1;
+
+			min_freq = (SOCClks[soc_dpm_level].Freq < VPEClks[soc_dpm_level].Freq) ?
+				   SOCClks[soc_dpm_level].Freq : VPEClks[soc_dpm_level].Freq;
+
+			switch (idx) {
+			case 0:
+				pratio_vmin_freq = min_freq;
+				break;
+			case 1:
+				pratio_vmid_freq = min_freq;
+				break;
+			case 2:
+				pratio_vnorm_freq = min_freq;
+				break;
+			case 3:
+				pratio_vmax_freq = min_freq;
+				break;
+			default:
+				break;
+			}
+		}
+
+		if (pratio_vmin_freq && pratio_vmid_freq && pratio_vnorm_freq && pratio_vmax_freq) {
+			uint32_t pratio_ctl;
+
+			pratio_vmax_vnorm = (uint32_t)vpe_internal_get_pratio(pratio_vmax_freq, pratio_vnorm_freq);
+			pratio_vnorm_vmid = (uint32_t)vpe_internal_get_pratio(pratio_vnorm_freq, pratio_vmid_freq);
+			pratio_vmid_vmin = (uint32_t)vpe_internal_get_pratio(pratio_vmid_freq, pratio_vmin_freq);
+
+			pratio_ctl = pratio_vmax_vnorm | (pratio_vnorm_vmid << 9) | (pratio_vmid_vmin << 18);
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_pratio), pratio_ctl);		/* PRatio */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_interval), 24000);	/* 1ms, unit=1/24MHz */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_decision_threshold), 1200000);	/* 50ms */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_busy_clamp_threshold), 1200000);/* 50ms */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_idle_clamp_threshold), 1200000);/* 50ms */
+			dev_dbg(adev->dev, "%s: configure vpe dpm pratio done!\n", __func__);
+		} else {
+			dev_dbg(adev->dev, "%s: invalid pratio parameters!\n", __func__);
+			goto disable_dpm;
+		}
+	}
+	return 0;
+
+disable_dpm:
+	dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+	dpm_ctl &= 0xfffffffe; /* Disable DPM */
+	WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+	dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__);
+	return 0;
+}
+
 int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev)
 {
 	struct amdgpu_firmware_info ucode = {
@@ -134,6 +313,19 @@ static int vpe_early_init(void *handle)
 	return 0;
 }
 
+static void vpe_idle_work_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, vpe.idle_work.work);
+	unsigned int fences = 0;
+
+	fences += amdgpu_fence_count_emitted(&adev->vpe.ring);
+
+	if (fences == 0)
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+	else
+		schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
 
 static int vpe_common_init(struct amdgpu_vpe *vpe)
 {
@@ -150,6 +342,9 @@ static int vpe_common_init(struct amdgpu_vpe *vpe)
 		return r;
 	}
 
+	vpe->context_started = false;
+	INIT_DELAYED_WORK(&adev->vpe.idle_work, vpe_idle_work_handler);
+
 	return 0;
 }
 
@@ -219,6 +414,9 @@ static int vpe_hw_fini(void *handle)
 
 	vpe_ring_stop(vpe);
 
+	/* Power off VPE */
+	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+
 	return 0;
 }
 
@@ -226,6 +424,8 @@ static int vpe_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	cancel_delayed_work_sync(&adev->vpe.idle_work);
+
 	return vpe_hw_fini(adev);
 }
 
@@ -430,6 +630,21 @@ static int vpe_set_clockgating_state(void *handle,
 static int vpe_set_powergating_state(void *handle,
 				     enum amd_powergating_state state)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+
+	if (!adev->pm.dpm_enabled)
+		dev_err(adev->dev, "Without PM, cannot support powergating\n");
+
+	dev_dbg(adev->dev, "%s: %s!\n", __func__, (state == AMD_PG_STATE_GATE) ? "GATE":"UNGATE");
+
+	if (state == AMD_PG_STATE_GATE) {
+		amdgpu_dpm_enable_vpe(adev, false);
+		vpe->context_started = false;
+	} else {
+		amdgpu_dpm_enable_vpe(adev, true);
+	}
+
 	return 0;
 }
 
@@ -595,6 +810,38 @@ err0:
 	return ret;
 }
 
+static void vpe_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+
+	cancel_delayed_work_sync(&adev->vpe.idle_work);
+
+	/* Power on VPE and notify VPE of new context  */
+	if (!vpe->context_started) {
+		uint32_t context_notify;
+
+		/* Power on VPE */
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_UNGATE);
+
+		/* Indicates that a job from a new context has been submitted. */
+		context_notify = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator));
+		if ((context_notify & 0x1) == 0)
+			context_notify |= 0x1;
+		else
+			context_notify &= ~(0x1);
+		WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator), context_notify);
+		vpe->context_started = true;
+	}
+}
+
+static void vpe_ring_end_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
+
 static const struct amdgpu_ring_funcs vpe_ring_funcs = {
 	.type = AMDGPU_RING_TYPE_VPE,
 	.align_mask = 0xf,
@@ -625,6 +872,8 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = {
 	.init_cond_exec = vpe_ring_init_cond_exec,
 	.patch_cond_exec = vpe_ring_patch_cond_exec,
 	.preempt_ib = vpe_ring_preempt_ib,
+	.begin_use = vpe_ring_begin_use,
+	.end_use = vpe_ring_end_use,
 };
 
 static void vpe_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
index 29d56f7ae4a9..1153ddaea64d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
@@ -47,6 +47,15 @@ struct vpe_regs {
 	uint32_t queue0_rb_wptr_lo;
 	uint32_t queue0_rb_wptr_hi;
 	uint32_t queue0_preempt;
+
+	uint32_t dpm_enable;
+	uint32_t dpm_pratio;
+	uint32_t dpm_request_interval;
+	uint32_t dpm_decision_threshold;
+	uint32_t dpm_busy_clamp_threshold;
+	uint32_t dpm_idle_clamp_threshold;
+	uint32_t dpm_request_lv;
+	uint32_t context_indicator;
 };
 
 struct amdgpu_vpe {
@@ -63,12 +72,15 @@ struct amdgpu_vpe {
 	struct amdgpu_bo		*cmdbuf_obj;
 	uint64_t			cmdbuf_gpu_addr;
 	uint32_t			*cmdbuf_cpu_addr;
+	struct delayed_work		idle_work;
+	bool				context_started;
 };
 
 int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev);
 int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe);
 int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe);
 int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe);
 
 #define vpe_ring_init(vpe) ((vpe)->funcs->ring_init ? (vpe)->funcs->ring_init((vpe)) : 0)
 #define vpe_ring_start(vpe) ((vpe)->funcs->ring_start ? (vpe)->funcs->ring_start((vpe)) : 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index bd20cb3b9819..a6c88f2fe6e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -413,6 +413,38 @@ static ssize_t amdgpu_xgmi_show_num_links(struct device *dev,
 	return sysfs_emit(buf, "%s\n", buf);
 }
 
+static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+	int i, j, size = 0;
+	int current_node;
+	/*
+	 * get the node id in the sysfs for the current socket and show
+	 * it in the port num info output in the sysfs for easy reading.
+	 * it is NOT the one retrieved from xgmi ta.
+	 */
+	for (i = 0; i < top->num_nodes; i++) {
+		if (top->nodes[i].node_id == adev->gmc.xgmi.node_id) {
+			current_node = i;
+			break;
+		}
+	}
+
+	for (i = 0; i < top->num_nodes; i++) {
+		for (j = 0; j < top->nodes[i].num_links; j++)
+			/* node id in sysfs starts from 1 rather than 0 so +1 here */
+			size += sysfs_emit_at(buf, size, "%02x:%02x ->  %02x:%02x\n", current_node + 1,
+					      top->nodes[i].port_num[j].src_xgmi_port_num, i + 1,
+					      top->nodes[i].port_num[j].dst_xgmi_port_num);
+	}
+
+	return size;
+}
+
 #define AMDGPU_XGMI_SET_FICAA(o)	((o) | 0x456801)
 static ssize_t amdgpu_xgmi_show_error(struct device *dev,
 				      struct device_attribute *attr,
@@ -452,6 +484,7 @@ static DEVICE_ATTR(xgmi_physical_id, 0444, amdgpu_xgmi_show_physical_id, NULL);
 static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);
 static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL);
 static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL);
+static DEVICE_ATTR(xgmi_port_num, S_IRUGO, amdgpu_xgmi_show_connected_port_num, NULL);
 
 static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
 					 struct amdgpu_hive_info *hive)
@@ -487,6 +520,13 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
 	if (ret)
 		pr_err("failed to create xgmi_num_links\n");
 
+	/* Create xgmi port num file if supported */
+	if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+		ret = device_create_file(adev->dev, &dev_attr_xgmi_port_num);
+		if (ret)
+			dev_err(adev->dev, "failed to create xgmi_port_num\n");
+	}
+
 	/* Create sysfs link to hive info folder on the first device */
 	if (hive->kobj.parent != (&adev->dev->kobj)) {
 		ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj,
@@ -517,6 +557,8 @@ remove_file:
 	device_remove_file(adev->dev, &dev_attr_xgmi_error);
 	device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
 	device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+	if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+		device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
 
 success:
 	return ret;
@@ -533,6 +575,8 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
 	device_remove_file(adev->dev, &dev_attr_xgmi_error);
 	device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
 	device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+	if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+		device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
 
 	if (hive->kobj.parent != (&adev->dev->kobj))
 		sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
@@ -779,6 +823,28 @@ static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_inf
 	return 0;
 }
 
+static void amdgpu_xgmi_fill_topology_info(struct amdgpu_device *adev,
+	struct amdgpu_device *peer_adev)
+{
+	struct psp_xgmi_topology_info *top_info = &adev->psp.xgmi_context.top_info;
+	struct psp_xgmi_topology_info *peer_info = &peer_adev->psp.xgmi_context.top_info;
+
+	for (int i = 0; i < peer_info->num_nodes; i++) {
+		if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id) {
+			for (int j = 0; j < top_info->num_nodes; j++) {
+				if (top_info->nodes[j].node_id == peer_adev->gmc.xgmi.node_id) {
+					peer_info->nodes[i].num_hops = top_info->nodes[j].num_hops;
+					peer_info->nodes[i].is_sharing_enabled =
+							top_info->nodes[j].is_sharing_enabled;
+					peer_info->nodes[i].num_links =
+							top_info->nodes[j].num_links;
+					return;
+				}
+			}
+		}
+	}
+}
+
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 {
 	struct psp_xgmi_topology_info *top_info;
@@ -853,18 +919,38 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 				goto exit_unlock;
 		}
 
-		/* get latest topology info for each device from psp */
-		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
-			ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
-					&tmp_adev->psp.xgmi_context.top_info, false);
+		if (amdgpu_sriov_vf(adev) &&
+			adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+			/* only get topology for VF being init if it can support full duplex */
+			ret = psp_xgmi_get_topology_info(&adev->psp, count,
+						&adev->psp.xgmi_context.top_info, false);
 			if (ret) {
-				dev_err(tmp_adev->dev,
+				dev_err(adev->dev,
 					"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
-					tmp_adev->gmc.xgmi.node_id,
-					tmp_adev->gmc.xgmi.hive_id, ret);
-				/* To do : continue with some node failed or disable the whole hive */
+					adev->gmc.xgmi.node_id,
+					adev->gmc.xgmi.hive_id, ret);
+				/* To do: continue with some node failed or disable the whole hive*/
 				goto exit_unlock;
 			}
+
+			/* fill the topology info for peers instead of getting from PSP */
+			list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+				amdgpu_xgmi_fill_topology_info(adev, tmp_adev);
+			}
+		} else {
+			/* get latest topology info for each device from psp */
+			list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+				ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+					&tmp_adev->psp.xgmi_context.top_info, false);
+				if (ret) {
+					dev_err(tmp_adev->dev,
+						"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+						tmp_adev->gmc.xgmi.node_id,
+						tmp_adev->gmc.xgmi.hive_id, ret);
+					/* To do : continue with some node failed or disable the whole hive */
+					goto exit_unlock;
+				}
+			}
 		}
 
 		/* get topology again for hives that support extended data */
@@ -1227,10 +1313,10 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a
 
 	switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) {
 	case AMDGPU_MCA_ERROR_TYPE_UE:
-		amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL);
+		amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, NULL, 1ULL);
 		break;
 	case AMDGPU_MCA_ERROR_TYPE_CE:
-		amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL);
+		amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, NULL, 1ULL);
 		break;
 	default:
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index 3f715e7fe1a9..d6f808acfb17 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -24,6 +24,7 @@
 #include "soc15.h"
 
 #include "soc15_common.h"
+#include "amdgpu_reg_state.h"
 #include "amdgpu_xcp.h"
 #include "gfx_v9_4_3.h"
 #include "gfxhub_v1_2.h"
@@ -656,3 +657,416 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
 
 	return 0;
 }
+
+static void aqua_read_smn(struct amdgpu_device *adev,
+			  struct amdgpu_smn_reg_data *regdata,
+			  uint64_t smn_addr)
+{
+	regdata->addr = smn_addr;
+	regdata->value = RREG32_PCIE(smn_addr);
+}
+
+struct aqua_reg_list {
+	uint64_t start_addr;
+	uint32_t num_regs;
+	uint32_t incrx;
+};
+
+#define DW_ADDR_INCR	4
+
+static void aqua_read_smn_ext(struct amdgpu_device *adev,
+			      struct amdgpu_smn_reg_data *regdata,
+			      uint64_t smn_addr, int i)
+{
+	regdata->addr =
+		smn_addr + adev->asic_funcs->encode_ext_smn_addressing(i);
+	regdata->value = RREG32_PCIE_EXT(regdata->addr);
+}
+
+#define smnreg_0x1A340218	0x1A340218
+#define smnreg_0x1A3402E4	0x1A3402E4
+#define smnreg_0x1A340294	0x1A340294
+#define smreg_0x1A380088	0x1A380088
+
+#define NUM_PCIE_SMN_REGS	14
+
+static struct aqua_reg_list pcie_reg_addrs[] = {
+	{ smnreg_0x1A340218, 1, 0 },
+	{ smnreg_0x1A3402E4, 1, 0 },
+	{ smnreg_0x1A340294, 6, DW_ADDR_INCR },
+	{ smreg_0x1A380088, 6, DW_ADDR_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_pcie_state(struct amdgpu_device *adev,
+					     void *buf, size_t max_size)
+{
+	struct amdgpu_reg_state_pcie_v1_0 *pcie_reg_state;
+	uint32_t start_addr, incrx, num_regs, szbuf;
+	struct amdgpu_regs_pcie_v1_0 *pcie_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	struct pci_dev *us_pdev, *ds_pdev;
+	int aer_cap, r, n;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	pcie_reg_state = (struct amdgpu_reg_state_pcie_v1_0 *)buf;
+
+	szbuf = sizeof(*pcie_reg_state) +
+		amdgpu_reginst_size(1, sizeof(*pcie_regs), NUM_PCIE_SMN_REGS);
+	/* Only one instance of pcie regs */
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	pcie_regs = (struct amdgpu_regs_pcie_v1_0 *)((uint8_t *)buf +
+						     sizeof(*pcie_reg_state));
+	pcie_regs->inst_header.instance = 0;
+	pcie_regs->inst_header.state = AMDGPU_INST_S_OK;
+	pcie_regs->inst_header.num_smn_regs = NUM_PCIE_SMN_REGS;
+
+	reg_data = pcie_regs->smn_reg_values;
+
+	for (r = 0; r < ARRAY_SIZE(pcie_reg_addrs); r++) {
+		start_addr = pcie_reg_addrs[r].start_addr;
+		incrx = pcie_reg_addrs[r].incrx;
+		num_regs = pcie_reg_addrs[r].num_regs;
+		for (n = 0; n < num_regs; n++) {
+			aqua_read_smn(adev, reg_data, start_addr + n * incrx);
+			++reg_data;
+		}
+	}
+
+	ds_pdev = pci_upstream_bridge(adev->pdev);
+	us_pdev = pci_upstream_bridge(ds_pdev);
+
+	pcie_capability_read_word(us_pdev, PCI_EXP_DEVSTA,
+				  &pcie_regs->device_status);
+	pcie_capability_read_word(us_pdev, PCI_EXP_LNKSTA,
+				  &pcie_regs->link_status);
+
+	aer_cap = pci_find_ext_capability(us_pdev, PCI_EXT_CAP_ID_ERR);
+	if (aer_cap) {
+		pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_COR_STATUS,
+				      &pcie_regs->pcie_corr_err_status);
+		pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_UNCOR_STATUS,
+				      &pcie_regs->pcie_uncorr_err_status);
+	}
+
+	pci_read_config_dword(us_pdev, PCI_PRIMARY_BUS,
+			      &pcie_regs->sub_bus_number_latency);
+
+	pcie_reg_state->common_header.structure_size = szbuf;
+	pcie_reg_state->common_header.format_revision = 1;
+	pcie_reg_state->common_header.content_revision = 0;
+	pcie_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_PCIE;
+	pcie_reg_state->common_header.num_instances = 1;
+
+	return pcie_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x11A00050	0x11A00050
+#define smnreg_0x11A00180	0x11A00180
+#define smnreg_0x11A00070	0x11A00070
+#define smnreg_0x11A00200	0x11A00200
+#define smnreg_0x11A0020C	0x11A0020C
+#define smnreg_0x11A00210	0x11A00210
+#define smnreg_0x11A00108	0x11A00108
+
+#define XGMI_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_XGMI_SMN_REGS 25
+
+static struct aqua_reg_list xgmi_reg_addrs[] = {
+	{ smnreg_0x11A00050, 1, 0 },
+	{ smnreg_0x11A00180, 16, DW_ADDR_INCR },
+	{ smnreg_0x11A00070, 4, DW_ADDR_INCR },
+	{ smnreg_0x11A00200, 1, 0 },
+	{ smnreg_0x11A0020C, 1, 0 },
+	{ smnreg_0x11A00210, 1, 0 },
+	{ smnreg_0x11A00108, 1, 0 },
+};
+
+static ssize_t aqua_vanjaram_read_xgmi_state(struct amdgpu_device *adev,
+					     void *buf, size_t max_size)
+{
+	struct amdgpu_reg_state_xgmi_v1_0 *xgmi_reg_state;
+	uint32_t start_addr, incrx, num_regs, szbuf;
+	struct amdgpu_regs_xgmi_v1_0 *xgmi_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	const int max_xgmi_instances = 8;
+	int inst = 0, i, j, r, n;
+	const int xgmi_inst = 2;
+	void *p;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	xgmi_reg_state = (struct amdgpu_reg_state_xgmi_v1_0 *)buf;
+
+	szbuf = sizeof(*xgmi_reg_state) +
+		amdgpu_reginst_size(max_xgmi_instances, sizeof(*xgmi_regs),
+				    NUM_XGMI_SMN_REGS);
+	/* Only one instance of pcie regs */
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	p = &xgmi_reg_state->xgmi_state_regs[0];
+	for_each_inst(i, adev->aid_mask) {
+		for (j = 0; j < xgmi_inst; ++j) {
+			xgmi_regs = (struct amdgpu_regs_xgmi_v1_0 *)p;
+			xgmi_regs->inst_header.instance = inst++;
+
+			xgmi_regs->inst_header.state = AMDGPU_INST_S_OK;
+			xgmi_regs->inst_header.num_smn_regs = NUM_XGMI_SMN_REGS;
+
+			reg_data = xgmi_regs->smn_reg_values;
+
+			for (r = 0; r < ARRAY_SIZE(xgmi_reg_addrs); r++) {
+				start_addr = xgmi_reg_addrs[r].start_addr;
+				incrx = xgmi_reg_addrs[r].incrx;
+				num_regs = xgmi_reg_addrs[r].num_regs;
+
+				for (n = 0; n < num_regs; n++) {
+					aqua_read_smn_ext(
+						adev, reg_data,
+						XGMI_LINK_REG(start_addr, j) +
+							n * incrx,
+						i);
+					++reg_data;
+				}
+			}
+			p = reg_data;
+		}
+	}
+
+	xgmi_reg_state->common_header.structure_size = szbuf;
+	xgmi_reg_state->common_header.format_revision = 1;
+	xgmi_reg_state->common_header.content_revision = 0;
+	xgmi_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_XGMI;
+	xgmi_reg_state->common_header.num_instances = max_xgmi_instances;
+
+	return xgmi_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x11C00070	0x11C00070
+#define smnreg_0x11C00210	0x11C00210
+
+static struct aqua_reg_list wafl_reg_addrs[] = {
+	{ smnreg_0x11C00070, 4, DW_ADDR_INCR },
+	{ smnreg_0x11C00210, 1, 0 },
+};
+
+#define WAFL_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_WAFL_SMN_REGS 5
+
+static ssize_t aqua_vanjaram_read_wafl_state(struct amdgpu_device *adev,
+					     void *buf, size_t max_size)
+{
+	struct amdgpu_reg_state_wafl_v1_0 *wafl_reg_state;
+	uint32_t start_addr, incrx, num_regs, szbuf;
+	struct amdgpu_regs_wafl_v1_0 *wafl_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	const int max_wafl_instances = 8;
+	int inst = 0, i, j, r, n;
+	const int wafl_inst = 2;
+	void *p;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	wafl_reg_state = (struct amdgpu_reg_state_wafl_v1_0 *)buf;
+
+	szbuf = sizeof(*wafl_reg_state) +
+		amdgpu_reginst_size(max_wafl_instances, sizeof(*wafl_regs),
+				    NUM_WAFL_SMN_REGS);
+
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	p = &wafl_reg_state->wafl_state_regs[0];
+	for_each_inst(i, adev->aid_mask) {
+		for (j = 0; j < wafl_inst; ++j) {
+			wafl_regs = (struct amdgpu_regs_wafl_v1_0 *)p;
+			wafl_regs->inst_header.instance = inst++;
+
+			wafl_regs->inst_header.state = AMDGPU_INST_S_OK;
+			wafl_regs->inst_header.num_smn_regs = NUM_WAFL_SMN_REGS;
+
+			reg_data = wafl_regs->smn_reg_values;
+
+			for (r = 0; r < ARRAY_SIZE(wafl_reg_addrs); r++) {
+				start_addr = wafl_reg_addrs[r].start_addr;
+				incrx = wafl_reg_addrs[r].incrx;
+				num_regs = wafl_reg_addrs[r].num_regs;
+				for (n = 0; n < num_regs; n++) {
+					aqua_read_smn_ext(
+						adev, reg_data,
+						WAFL_LINK_REG(start_addr, j) +
+							n * incrx,
+						i);
+					++reg_data;
+				}
+			}
+			p = reg_data;
+		}
+	}
+
+	wafl_reg_state->common_header.structure_size = szbuf;
+	wafl_reg_state->common_header.format_revision = 1;
+	wafl_reg_state->common_header.content_revision = 0;
+	wafl_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_WAFL;
+	wafl_reg_state->common_header.num_instances = max_wafl_instances;
+
+	return wafl_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x1B311060 0x1B311060
+#define smnreg_0x1B411060 0x1B411060
+#define smnreg_0x1B511060 0x1B511060
+#define smnreg_0x1B611060 0x1B611060
+
+#define smnreg_0x1C307120 0x1C307120
+#define smnreg_0x1C317120 0x1C317120
+
+#define smnreg_0x1C320830 0x1C320830
+#define smnreg_0x1C380830 0x1C380830
+#define smnreg_0x1C3D0830 0x1C3D0830
+#define smnreg_0x1C420830 0x1C420830
+
+#define smnreg_0x1C320100 0x1C320100
+#define smnreg_0x1C380100 0x1C380100
+#define smnreg_0x1C3D0100 0x1C3D0100
+#define smnreg_0x1C420100 0x1C420100
+
+#define smnreg_0x1B310500 0x1B310500
+#define smnreg_0x1C300400 0x1C300400
+
+#define USR_CAKE_INCR 0x11000
+#define USR_LINK_INCR 0x100000
+#define USR_CP_INCR 0x10000
+
+#define NUM_USR_SMN_REGS	20
+
+struct aqua_reg_list usr_reg_addrs[] = {
+	{ smnreg_0x1B311060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1B411060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1B511060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1B611060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1C307120, 2, DW_ADDR_INCR },
+	{ smnreg_0x1C317120, 2, DW_ADDR_INCR },
+};
+
+#define NUM_USR1_SMN_REGS	46
+struct aqua_reg_list usr1_reg_addrs[] = {
+	{ smnreg_0x1C320830, 6, USR_CAKE_INCR },
+	{ smnreg_0x1C380830, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C3D0830, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C420830, 4, USR_CAKE_INCR },
+	{ smnreg_0x1C320100, 6, USR_CAKE_INCR },
+	{ smnreg_0x1C380100, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C3D0100, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C420100, 4, USR_CAKE_INCR },
+	{ smnreg_0x1B310500, 4, USR_LINK_INCR },
+	{ smnreg_0x1C300400, 2, USR_CP_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_usr_state(struct amdgpu_device *adev,
+					    void *buf, size_t max_size,
+					    int reg_state)
+{
+	uint32_t start_addr, incrx, num_regs, szbuf, num_smn;
+	struct amdgpu_reg_state_usr_v1_0 *usr_reg_state;
+	struct amdgpu_regs_usr_v1_0 *usr_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	const int max_usr_instances = 4;
+	struct aqua_reg_list *reg_addrs;
+	int inst = 0, i, n, r, arr_size;
+	void *p;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	switch (reg_state) {
+	case AMDGPU_REG_STATE_TYPE_USR:
+		arr_size = ARRAY_SIZE(usr_reg_addrs);
+		reg_addrs = usr_reg_addrs;
+		num_smn = NUM_USR_SMN_REGS;
+		break;
+	case AMDGPU_REG_STATE_TYPE_USR_1:
+		arr_size = ARRAY_SIZE(usr1_reg_addrs);
+		reg_addrs = usr1_reg_addrs;
+		num_smn = NUM_USR1_SMN_REGS;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	usr_reg_state = (struct amdgpu_reg_state_usr_v1_0 *)buf;
+
+	szbuf = sizeof(*usr_reg_state) + amdgpu_reginst_size(max_usr_instances,
+							     sizeof(*usr_regs),
+							     num_smn);
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	p = &usr_reg_state->usr_state_regs[0];
+	for_each_inst(i, adev->aid_mask) {
+		usr_regs = (struct amdgpu_regs_usr_v1_0 *)p;
+		usr_regs->inst_header.instance = inst++;
+		usr_regs->inst_header.state = AMDGPU_INST_S_OK;
+		usr_regs->inst_header.num_smn_regs = num_smn;
+		reg_data = usr_regs->smn_reg_values;
+
+		for (r = 0; r < arr_size; r++) {
+			start_addr = reg_addrs[r].start_addr;
+			incrx = reg_addrs[r].incrx;
+			num_regs = reg_addrs[r].num_regs;
+			for (n = 0; n < num_regs; n++) {
+				aqua_read_smn_ext(adev, reg_data,
+						  start_addr + n * incrx, i);
+				reg_data++;
+			}
+		}
+		p = reg_data;
+	}
+
+	usr_reg_state->common_header.structure_size = szbuf;
+	usr_reg_state->common_header.format_revision = 1;
+	usr_reg_state->common_header.content_revision = 0;
+	usr_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_USR;
+	usr_reg_state->common_header.num_instances = max_usr_instances;
+
+	return usr_reg_state->common_header.structure_size;
+}
+
+ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
+				    enum amdgpu_reg_state reg_state, void *buf,
+				    size_t max_size)
+{
+	ssize_t size;
+
+	switch (reg_state) {
+	case AMDGPU_REG_STATE_TYPE_PCIE:
+		size = aqua_vanjaram_read_pcie_state(adev, buf, max_size);
+		break;
+	case AMDGPU_REG_STATE_TYPE_XGMI:
+		size = aqua_vanjaram_read_xgmi_state(adev, buf, max_size);
+		break;
+	case AMDGPU_REG_STATE_TYPE_WAFL:
+		size = aqua_vanjaram_read_wafl_state(adev, buf, max_size);
+		break;
+	case AMDGPU_REG_STATE_TYPE_USR:
+		size = aqua_vanjaram_read_usr_state(adev, buf, max_size,
+						    AMDGPU_REG_STATE_TYPE_USR);
+		break;
+	case AMDGPU_REG_STATE_TYPE_USR_1:
+		size = aqua_vanjaram_read_usr_state(
+			adev, buf, max_size, AMDGPU_REG_STATE_TYPE_USR_1);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return size;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 2c221000782c..a33e890c70d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -395,7 +395,6 @@ static void atom_skip_src_int(atom_exec_context *ctx, uint8_t attr, int *ptr)
 			(*ptr)++;
 			return;
 		}
-		return;
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 3ee219aa2891..7672abe6c140 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -28,6 +28,7 @@
 
 #include <acpi/video.h>
 
+#include <drm/drm_edid.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_connectors.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index bb666cb7522e..587ee632a3b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 7af277f61cca..f22ec27365bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 143efc37a17f..4dbe9b3259b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -23,6 +23,7 @@
 
 #include <linux/pci.h>
 
+#include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index adeddfb7ff12..05bcce23385e 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index c8a3bf01743f..73f6d7e72c73 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -6593,7 +6593,8 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
 #endif
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+			    prop->allow_tunneling);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
 	mqd->cp_hqd_pq_control = tmp;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 0c6133cc5e57..2fbcd9765980 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -67,6 +67,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
@@ -89,6 +90,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
 
+static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
+};
+
 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
 {
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
@@ -289,6 +294,9 @@ static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
 
 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
 {
+	if (amdgpu_sriov_vf(adev))
+		return;
+
 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
 	case IP_VERSION(11, 0, 1):
 	case IP_VERSION(11, 0, 4):
@@ -304,6 +312,10 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
 	default:
 		break;
 	}
+	soc15_program_register_sequence(adev,
+					golden_settings_gc_11_0,
+					(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
+
 }
 
 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
@@ -419,7 +431,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 		cpu_ptr = &adev->wb.wb[index];
 
-		r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
+		r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 		if (r) {
 			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 			goto err1;
@@ -556,7 +568,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
 	}
 
 	if (!amdgpu_sriov_vf(adev)) {
-		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
+		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
+		    adev->pdev->revision == 0xCE)
+			snprintf(fw_name, sizeof(fw_name), "amdgpu/gc_11_0_0_rlc_1.bin");
+		else
+			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
 		if (err)
 			goto out;
@@ -3831,7 +3847,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
 			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
-	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+			    prop->allow_tunneling);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
 	mqd->cp_hqd_pq_control = tmp;
@@ -4457,11 +4474,43 @@ static int gfx_v11_0_wait_for_idle(void *handle)
 	return -ETIMEDOUT;
 }
 
+static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+					     int req)
+{
+	u32 i, tmp, val;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		/* Request with MeId=2, PipeId=0 */
+		tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+		tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+		WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+		val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+		if (req) {
+			if (val == tmp)
+				break;
+		} else {
+			tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+					    REQUEST, 1);
+
+			/* unlocked or locked by firmware */
+			if (val != tmp)
+				break;
+		}
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int gfx_v11_0_soft_reset(void *handle)
 {
 	u32 grbm_soft_reset = 0;
 	u32 tmp;
-	int i, j, k;
+	int r, i, j, k;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
@@ -4501,6 +4550,13 @@ static int gfx_v11_0_soft_reset(void *handle)
 		}
 	}
 
+	/* Try to acquire the gfx mutex before access to CP_VMID_RESET */
+	r = gfx_v11_0_request_gfx_index_mutex(adev, 1);
+	if (r) {
+		DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
+		return r;
+	}
+
 	WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
 
 	// Read CP_VMID_RESET register three times.
@@ -4509,6 +4565,13 @@ static int gfx_v11_0_soft_reset(void *handle)
 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
 
+	/* release the gfx mutex */
+	r = gfx_v11_0_request_gfx_index_mutex(adev, 0);
+	if (r) {
+		DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
+		return r;
+	}
+
 	for (i = 0; i < adev->usec_timeout; i++) {
 		if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
 		    !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 885ebd703260..1943beb135c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -883,8 +883,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	gpu_addr = adev->wb.gpu_addr + (index * 4);
 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 16,
-					AMDGPU_IB_POOL_DIRECT, &ib);
+
+	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e3ff6e46f3f7..69c500910746 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1039,8 +1039,8 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	gpu_addr = adev->wb.gpu_addr + (index * 4);
 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 16,
-					AMDGPU_IB_POOL_DIRECT, &ib);
+
+	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 40d06d32bb74..131cddbdda0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -297,8 +297,8 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	gpu_addr = adev->wb.gpu_addr + (index * 4);
 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(adev, NULL, 16,
-			  AMDGPU_IB_POOL_DIRECT, &ib);
+
+	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 	if (r)
 		goto err1;
 
@@ -3828,8 +3828,8 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
 	/* the caller should make sure initialize value of
 	 * err_data->ue_count and err_data->ce_count
 	 */
-	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
-	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count);
+	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count);
 }
 
 static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
@@ -3882,150 +3882,6 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
 	mutex_unlock(&adev->grbm_idx_mutex);
 }
 
-static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev,
-					int xcc_id)
-{
-	uint32_t data;
-
-	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS);
-	if (data) {
-		dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data);
-		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3);
-	}
-
-	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS);
-	if (data) {
-		dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data);
-		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3);
-	}
-
-	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
-				regVML2_WALKER_MEM_ECC_STATUS);
-	if (data) {
-		dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data);
-		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS,
-				0x3);
-	}
-}
-
-static void gfx_v9_4_3_log_cu_timeout_status(struct amdgpu_device *adev,
-					uint32_t status, int xcc_id)
-{
-	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
-	uint32_t i, simd, wave;
-	uint32_t wave_status;
-	uint32_t wave_pc_lo, wave_pc_hi;
-	uint32_t wave_exec_lo, wave_exec_hi;
-	uint32_t wave_inst_dw0, wave_inst_dw1;
-	uint32_t wave_ib_sts;
-
-	for (i = 0; i < 32; i++) {
-		if (!((i << 1) & status))
-			continue;
-
-		simd = i / cu_info->max_waves_per_simd;
-		wave = i % cu_info->max_waves_per_simd;
-
-		wave_status = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS);
-		wave_pc_lo = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO);
-		wave_pc_hi = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI);
-		wave_exec_lo =
-			wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO);
-		wave_exec_hi =
-			wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI);
-		wave_inst_dw0 =
-			wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0);
-		wave_inst_dw1 =
-			wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1);
-		wave_ib_sts = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS);
-
-		dev_info(
-			adev->dev,
-			"\t SIMD %d, Wave %d: status 0x%x, pc 0x%llx, exec 0x%llx, inst 0x%llx, ib_sts 0x%x\n",
-			simd, wave, wave_status,
-			((uint64_t)wave_pc_hi << 32 | wave_pc_lo),
-			((uint64_t)wave_exec_hi << 32 | wave_exec_lo),
-			((uint64_t)wave_inst_dw1 << 32 | wave_inst_dw0),
-			wave_ib_sts);
-	}
-}
-
-static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev,
-					int xcc_id)
-{
-	uint32_t se_idx, sh_idx, cu_idx;
-	uint32_t status;
-
-	mutex_lock(&adev->grbm_idx_mutex);
-	for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) {
-		for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) {
-			for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) {
-				gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx,
-							cu_idx, xcc_id);
-				status = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
-						      regSQ_TIMEOUT_STATUS);
-				if (status != 0) {
-					dev_info(
-						adev->dev,
-						"GFX Watchdog Timeout: SE %d, SH %d, CU %d\n",
-						se_idx, sh_idx, cu_idx);
-					gfx_v9_4_3_log_cu_timeout_status(
-						adev, status, xcc_id);
-				}
-				/* clear old status */
-				WREG32_SOC15(GC, GET_INST(GC, xcc_id),
-						regSQ_TIMEOUT_STATUS, 0);
-			}
-		}
-	}
-	gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
-			xcc_id);
-	mutex_unlock(&adev->grbm_idx_mutex);
-}
-
-static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev,
-					void *ras_error_status, int xcc_id)
-{
-	gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id);
-	gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id);
-}
-
-static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev,
-					int xcc_id)
-{
-	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3);
-	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3);
-	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3);
-}
-
-static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev,
-					int xcc_id)
-{
-	uint32_t se_idx, sh_idx, cu_idx;
-
-	mutex_lock(&adev->grbm_idx_mutex);
-	for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) {
-		for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) {
-			for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) {
-				gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx,
-							cu_idx, xcc_id);
-				WREG32_SOC15(GC, GET_INST(GC, xcc_id),
-						regSQ_TIMEOUT_STATUS, 0);
-			}
-		}
-	}
-	gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
-			xcc_id);
-	mutex_unlock(&adev->grbm_idx_mutex);
-}
-
-static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev,
-					void *ras_error_status, int xcc_id)
-{
-	gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id);
-	gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id);
-}
-
 static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev,
 					void *ras_error_status, int xcc_id)
 {
@@ -4067,16 +3923,6 @@ static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev)
 	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count);
 }
 
-static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev)
-{
-	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status);
-}
-
-static void gfx_v9_4_3_reset_ras_error_status(struct amdgpu_device *adev)
-{
-	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_status);
-}
-
 static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev)
 {
 	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer);
@@ -4394,8 +4240,6 @@ struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = {
 struct amdgpu_ras_block_hw_ops  gfx_v9_4_3_ras_ops = {
 	.query_ras_error_count = &gfx_v9_4_3_query_ras_error_count,
 	.reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count,
-	.query_ras_error_status = &gfx_v9_4_3_query_ras_error_status,
-	.reset_ras_error_status = &gfx_v9_4_3_reset_ras_error_status,
 };
 
 struct amdgpu_gfx_ras gfx_v9_4_3_ras = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 53a2ba5fcf4b..22175da0e16a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -102,7 +102,9 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 		WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 			min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
-		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+		if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+				       AMD_APU_IS_RENOIR |
+				       AMD_APU_IS_GREEN_SARDINE))
 		       /*
 			* Raven2 has a HW issue that it is unable to use the
 			* vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index 55423ff1bb49..95d06da544e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -139,7 +139,9 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev,
 			WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 				min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
-			if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+			if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+					       AMD_APU_IS_RENOIR |
+					       AMD_APU_IS_GREEN_SARDINE))
 			       /*
 				* Raven2 has a HW issue that it is unable to use the
 				* vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index a5a05c16c10d..6c5185608854 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -1041,6 +1041,10 @@ static int gmc_v10_0_hw_fini(void *handle)
 
 	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
 
+	if (adev->gmc.ecc_irq.funcs &&
+		amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+		amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 23d7b548d13f..c9c653cfc765 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -941,6 +941,11 @@ static int gmc_v11_0_hw_fini(void *handle)
 	}
 
 	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+	if (adev->gmc.ecc_irq.funcs &&
+		amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+		amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
 	gmc_v11_0_gart_disable(adev);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2ac5820e9c92..f9039d64ff2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -883,7 +883,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	 * GRBM interface.
 	 */
 	if ((vmhub == AMDGPU_GFXHUB(0)) &&
-	    (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
+	    (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)))
 		RREG32_NO_KIQ(req);
 
 	for (j = 0; j < adev->usec_timeout; j++) {
@@ -2380,6 +2380,10 @@ static int gmc_v9_0_hw_fini(void *handle)
 
 	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
 
+	if (adev->gmc.ecc_irq.funcs &&
+		amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+		amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index 49e934975719..4db6bb73ead4 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -129,6 +129,11 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev,
 {
 	int data;
 
+	if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2)) {
+		/* Default enabled */
+		*flags |= AMD_CG_SUPPORT_HDP_MGCG;
+		return;
+	}
 	/* AMD_CG_SUPPORT_HDP_LS */
 	data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
 	if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
index 9df011323d4b..6ede85b28cc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -155,13 +155,6 @@ static int jpeg_v4_0_5_hw_init(void *handle)
 	struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
 	int r;
 
-	adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
-				(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
-
-	WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
-		ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
-		VCN_JPEG_DB_CTRL__EN_MASK);
-
 	r = amdgpu_ring_test_helper(ring);
 	if (r)
 		return r;
@@ -336,6 +329,14 @@ static int jpeg_v4_0_5_start(struct amdgpu_device *adev)
 	if (adev->pm.dpm_enabled)
 		amdgpu_dpm_enable_jpeg(adev, true);
 
+	/* doorbell programming is done for every playback */
+	adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+				(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+	WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+		ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+		VCN_JPEG_DB_CTRL__EN_MASK);
+
 	/* disable power gating */
 	r = jpeg_v4_0_5_disable_static_power_gating(adev);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 4dfec56e1b7f..26d71a22395d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -408,6 +408,8 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
 	mes_set_hw_res_pkt.enable_reg_active_poll = 1;
 	mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
 	mes_set_hw_res_pkt.oversubscription_timer = 50;
+	mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+	mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr;
 
 	return mes_v11_0_submit_pkt_and_poll_completion(mes,
 			&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 843219a91736..e3ddd22aa172 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -96,7 +96,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 		     min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
-	if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+	if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+			       AMD_APU_IS_RENOIR |
+			       AMD_APU_IS_GREEN_SARDINE))
 		/*
 		 * Raven2 has a HW issue that it is unable to use the vram which
 		 * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index 9b0146732e13..fb53aacdcba2 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -652,8 +652,8 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev,
 					AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
 					&ue_count);
 
-	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
-	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count);
+	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count);
 }
 
 static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
index 676ab1d20d2f..1f52b4b1db03 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
@@ -259,17 +259,17 @@ const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg = {
 
 static void nbio_v7_11_init_registers(struct amdgpu_device *adev)
 {
-/*	uint32_t def, data;
+	uint32_t def, data;
+
+	def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3);
+	data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
+				CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+	data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
+				CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
 
-		def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3);
-		data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
-			CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
-		data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
-			CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+	if (def != data)
+		WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data);
 
-		if (def != data)
-			WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data);
-*/
 }
 
 static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 23f26f8caad4..25a3da83e0fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -611,11 +611,6 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device
 
 		dev_info(adev->dev, "RAS controller interrupt triggered "
 					"by NBIF error\n");
-
-		/* ras_controller_int is dedicated for nbif ras error,
-		 * not the global interrupt for sync flood
-		 */
-		amdgpu_ras_reset_gpu(adev);
 	}
 
 	amdgpu_ras_error_data_fini(&err_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 3cf4684d0d3f..df1844d0800f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -60,7 +60,7 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin");
 #define GFX_CMD_USB_PD_USE_LFB 0x480
 
 /* Retry times for vmbx ready wait */
-#define PSP_VMBX_POLLING_LIMIT 20000
+#define PSP_VMBX_POLLING_LIMIT 3000
 
 /* VBIOS gfl defines */
 #define MBOX_READY_MASK 0x80000000
@@ -161,14 +161,18 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
 static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
-	int retry_loop, ret;
+	int retry_loop, retry_cnt, ret;
 
+	retry_cnt =
+		(amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) ?
+			PSP_VMBX_POLLING_LIMIT :
+			10;
 	/* Wait for bootloader to signify that it is ready having bit 31 of
 	 * C2PMSG_35 set to 1. All other bits are expected to be cleared.
 	 * If there is an error in processing command, bits[7:0] will be set.
 	 * This is applicable for PSP v13.0.6 and newer.
 	 */
-	for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) {
+	for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) {
 		ret = psp_wait_for(
 			psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
 			0x80000000, 0xffffffff, false);
@@ -821,7 +825,7 @@ static int psp_v13_0_query_boot_status(struct psp_context *psp)
 	if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))
 		return 0;
 
-	if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10007)
+	if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10109)
 		return 0;
 
 	for_each_inst(i, inst_mask) {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 45377a175250..8d5d86675a7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -813,12 +813,12 @@ static int sdma_v2_4_early_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int r;
 
+	adev->sdma.num_instances = SDMA_MAX_INSTANCE;
+
 	r = sdma_v2_4_init_microcode(adev);
 	if (r)
 		return r;
 
-	adev->sdma.num_instances = SDMA_MAX_INSTANCE;
-
 	sdma_v2_4_set_ring_funcs(adev);
 	sdma_v2_4_set_buffer_funcs(adev);
 	sdma_v2_4_set_vm_pte_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 0f24af6f2810..2d688dca26be 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -2156,7 +2156,7 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev,
 					AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
 					&ue_count);
 
-	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count);
 }
 
 static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 83c240f741b5..0058f3f7cf6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1643,6 +1643,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
 		*flags |= AMD_CG_SUPPORT_SDMA_LS;
 }
 
+static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+	 * disallow GFXOFF in some cases leading to
+	 * hangs in SDMA.  Disallow GFXOFF while SDMA is active.
+	 * We can probably just limit this to 5.2.3,
+	 * but it shouldn't hurt for other parts since
+	 * this GFXOFF will be disallowed anyway when SDMA is
+	 * active, this just makes it explicit.
+	 */
+	amdgpu_gfx_off_ctrl(adev, false);
+}
+
+static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+	 * disallow GFXOFF in some cases leading to
+	 * hangs in SDMA.  Allow GFXOFF when SDMA is complete.
+	 */
+	amdgpu_gfx_off_ctrl(adev, true);
+}
+
 const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
 	.name = "sdma_v5_2",
 	.early_init = sdma_v5_2_early_init,
@@ -1690,6 +1716,8 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
 	.test_ib = sdma_v5_2_ring_test_ib,
 	.insert_nop = sdma_v5_2_ring_insert_nop,
 	.pad_ib = sdma_v5_2_ring_pad_ib,
+	.begin_use = sdma_v5_2_ring_begin_use,
+	.end_use = sdma_v5_2_ring_end_use,
 	.emit_wreg = sdma_v5_2_ring_emit_wreg,
 	.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index d4b8d62f4294..15033efec2ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -902,6 +902,7 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
 	.pre_asic_init = &soc15_pre_asic_init,
 	.query_video_codecs = &soc15_query_video_codecs,
 	.encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing,
+	.get_reg_state = &aqua_vanjaram_get_reg_state,
 };
 
 static int soc15_common_early_init(void *handle)
@@ -1161,6 +1162,11 @@ static int soc15_common_early_init(void *handle)
 			AMD_PG_SUPPORT_VCN_DPG |
 			AMD_PG_SUPPORT_JPEG;
 		adev->external_rev_id = adev->rev_id + 0x46;
+		/* GC 9.4.3 uses MMIO register region hole at a different offset */
+		if (!amdgpu_sriov_vf(adev)) {
+			adev->rmmio_remap.reg_offset = 0x1A000;
+			adev->rmmio_remap.bus_addr = adev->rmmio_base + 0x1A000;
+		}
 		break;
 	default:
 		/* FIXME: not supported yet */
@@ -1418,11 +1424,14 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
 	if (amdgpu_sriov_vf(adev))
 		*flags = 0;
 
-	adev->nbio.funcs->get_clockgating_state(adev, flags);
+	if (adev->nbio.funcs && adev->nbio.funcs->get_clockgating_state)
+		adev->nbio.funcs->get_clockgating_state(adev, flags);
 
-	adev->hdp.funcs->get_clock_gating_state(adev, flags);
+	if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state)
+		adev->hdp.funcs->get_clock_gating_state(adev, flags);
 
-	if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) {
+	if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) &&
+	    (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))) {
 		/* AMD_CG_SUPPORT_DRM_MGCG */
 		data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0));
 		if (!(data & 0x01000000))
@@ -1435,9 +1444,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
 	}
 
 	/* AMD_CG_SUPPORT_ROM_MGCG */
-	adev->smuio.funcs->get_clock_gating_state(adev, flags);
+	if (adev->smuio.funcs && adev->smuio.funcs->get_clock_gating_state)
+		adev->smuio.funcs->get_clock_gating_state(adev, flags);
 
-	adev->df.funcs->get_clockgating_state(adev, flags);
+	if (adev->df.funcs && adev->df.funcs->get_clockgating_state)
+		adev->df.funcs->get_clockgating_state(adev, flags);
 }
 
 static int soc15_common_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index eac54042c6c0..1444b7765e4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -27,6 +27,7 @@
 #include "nbio_v6_1.h"
 #include "nbio_v7_0.h"
 #include "nbio_v7_4.h"
+#include "amdgpu_reg_state.h"
 
 extern const struct amdgpu_ip_block_version vega10_common_ip_block;
 
@@ -114,6 +115,9 @@ int aldebaran_reg_base_init(struct amdgpu_device *adev);
 void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev);
 u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id);
 int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev);
+ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
+				    enum amdgpu_reg_state reg_state, void *buf,
+				    size_t max_size);
 
 void vega10_doorbell_index_init(struct amdgpu_device *adev);
 void vega20_doorbell_index_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index e9c2ff74f0bc..7458a218e89d 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -26,6 +26,7 @@
 #include "amdgpu.h"
 #include "umc/umc_12_0_0_offset.h"
 #include "umc/umc_12_0_0_sh_mask.h"
+#include "mp/mp_13_0_6_sh_mask.h"
 
 const uint32_t
 	umc_v12_0_channel_idx_tbl[]
@@ -88,16 +89,26 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
 		umc_v12_0_reset_error_count_per_channel, NULL);
 }
 
-bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status)
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
 {
+	if (amdgpu_ras_is_poison_mode_supported(adev) &&
+	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1))
+		return true;
+
 	return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
 		(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1));
 }
 
-bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
 {
+	if (amdgpu_ras_is_poison_mode_supported(adev) &&
+	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1))
+		return false;
+
 	return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
 		(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
 		(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 &&
@@ -105,7 +116,7 @@ bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
 		/* Identify data parity error in replay mode */
 		((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 ||
 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) &&
-		!(umc_v12_0_is_uncorrectable_error(mc_umc_status)))));
+		!(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)))));
 }
 
 static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
@@ -124,7 +135,7 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
 	mc_umc_status =
 		RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
 
-	if (umc_v12_0_is_correctable_error(mc_umc_status))
+	if (umc_v12_0_is_correctable_error(adev, mc_umc_status))
 		*error_count += 1;
 }
 
@@ -142,7 +153,7 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev
 	mc_umc_status =
 		RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
 
-	if (umc_v12_0_is_uncorrectable_error(mc_umc_status))
+	if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status))
 		*error_count += 1;
 }
 
@@ -166,8 +177,8 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev,
 	umc_v12_0_query_correctable_error_count(adev, umc_reg_offset, &ce_count);
 	umc_v12_0_query_uncorrectable_error_count(adev, umc_reg_offset, &ue_count);
 
-	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
-	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count);
+	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count);
 
 	return 0;
 }
@@ -360,6 +371,59 @@ static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
+					void *ras_error_status)
+{
+	amdgpu_mca_smu_log_ras_error(adev,
+		AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status);
+	amdgpu_mca_smu_log_ras_error(adev,
+		AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status);
+}
+
+static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
+					void *ras_error_status)
+{
+	struct ras_err_node *err_node;
+	uint64_t mc_umc_status;
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+	for_each_ras_error(err_node, err_data) {
+		mc_umc_status = err_node->err_info.err_addr.err_status;
+		if (!mc_umc_status)
+			continue;
+
+		if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)) {
+			uint64_t mca_addr, err_addr, mca_ipid;
+			uint32_t InstanceIdLo;
+			struct amdgpu_smuio_mcm_config_info *mcm_info;
+
+			mcm_info = &err_node->err_info.mcm_info;
+			mca_addr = err_node->err_info.err_addr.err_addr;
+			mca_ipid = err_node->err_info.err_addr.err_ipid;
+
+			err_addr =  REG_GET_FIELD(mca_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+			InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo);
+
+			dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n",
+				mca_ipid,
+				mcm_info->die_id,
+				MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
+				MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
+				err_addr);
+
+			umc_v12_0_convert_error_address(adev,
+				err_data, err_addr,
+				MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
+				MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
+				mcm_info->die_id);
+
+			/* Clear umc error address content */
+			memset(&err_node->err_info.err_addr,
+				0, sizeof(err_node->err_info.err_addr));
+		}
+	}
+}
+
 static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev)
 {
 	amdgpu_umc_loop_channels(adev,
@@ -386,4 +450,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
 	},
 	.err_cnt_init = umc_v12_0_err_cnt_init,
 	.query_ras_poison_mode = umc_v12_0_query_ras_poison_mode,
+	.ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count,
+	.ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index b34b1e358f8b..e8de3a92251a 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -117,8 +117,12 @@
 		(pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \
 	} while (0)
 
-bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status);
-bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status);
+#define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \
+			(((_ipid_lo) >> 12) & 0xF))
+#define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7)
+
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
 
 extern const uint32_t
 	umc_v12_0_channel_idx_tbl[]
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 48bfcd0d558b..169ed400ee7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -100,6 +100,31 @@ static int vcn_v4_0_early_init(void *handle)
 	return amdgpu_vcn_early_init(adev);
 }
 
+static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+
+	fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+	fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+	fw_shared->sq.is_enabled = 1;
+
+	fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
+	fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
+		AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
+
+	if (amdgpu_ip_version(adev, VCN_HWIP, 0) ==
+	    IP_VERSION(4, 0, 2)) {
+		fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
+		fw_shared->drm_key_wa.method =
+			AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
+	}
+
+	if (amdgpu_vcnfw_log)
+		amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+
+	return 0;
+}
+
 /**
  * vcn_v4_0_sw_init - sw init for VCN block
  *
@@ -124,8 +149,6 @@ static int vcn_v4_0_sw_init(void *handle)
 		return r;
 
 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
-		volatile struct amdgpu_vcn4_fw_shared *fw_shared;
-
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
 
@@ -161,23 +184,7 @@ static int vcn_v4_0_sw_init(void *handle)
 		if (r)
 			return r;
 
-		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
-		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
-		fw_shared->sq.is_enabled = 1;
-
-		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
-		fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
-			AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
-
-		if (amdgpu_ip_version(adev, VCN_HWIP, 0) ==
-		    IP_VERSION(4, 0, 2)) {
-			fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
-			fw_shared->drm_key_wa.method =
-				AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
-		}
-
-		if (amdgpu_vcnfw_log)
-			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+		vcn_v4_0_fw_shared_init(adev, i);
 	}
 
 	if (amdgpu_sriov_vf(adev)) {
@@ -1273,6 +1280,9 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
 
+		// Must re/init fw_shared at beginning
+		vcn_v4_0_fw_shared_init(adev, i);
+
 		table_size = 0;
 
 		MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
index 174f13eff575..d20060a51e05 100644
--- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
@@ -96,6 +96,10 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe)
 		adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl;
 
 		amdgpu_vpe_psp_update_sram(adev);
+
+		/* Config DPM */
+		amdgpu_vpe_configure_dpm(vpe);
+
 		return 0;
 	}
 
@@ -128,6 +132,8 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe)
 	}
 
 	vpe_v6_1_halt(vpe, false);
+	/* Config DPM */
+	amdgpu_vpe_configure_dpm(vpe);
 
 	return 0;
 }
@@ -264,6 +270,15 @@ static int vpe_v6_1_set_regs(struct amdgpu_vpe *vpe)
 	vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI;
 	vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT;
 
+	vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2;
+	vpe->regs.dpm_pratio = regVPEC_QUEUE6_DUMMY4;
+	vpe->regs.dpm_request_interval = regVPEC_QUEUE5_DUMMY3;
+	vpe->regs.dpm_decision_threshold = regVPEC_QUEUE5_DUMMY4;
+	vpe->regs.dpm_busy_clamp_threshold = regVPEC_QUEUE7_DUMMY2;
+	vpe->regs.dpm_idle_clamp_threshold = regVPEC_QUEUE7_DUMMY3;
+	vpe->regs.dpm_request_lv = regVPEC_QUEUE7_DUMMY1;
+	vpe->regs.context_indicator = regVPEC_QUEUE6_DUMMY3;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index d7cd5fa313ff..df75863393fc 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -2069,7 +2069,7 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
 };
 
 static const uint32_t cwsr_trap_gfx10_hex[] = {
-	0xbf820001, 0xbf820220,
+	0xbf820001, 0xbf820221,
 	0xb0804004, 0xb978f802,
 	0x8a78ff78, 0x00020006,
 	0xb97bf803, 0x876eff78,
@@ -2118,391 +2118,391 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xbf900004, 0xbf8cc07f,
 	0x877aff7f, 0x04000000,
 	0x8f7a857a, 0x886d7a6d,
-	0xbefa037e, 0x877bff7f,
-	0x0000ffff, 0xbefe03c1,
-	0xbeff03c1, 0xdc5f8000,
-	0x007a0000, 0x7e000280,
-	0xbefe037a, 0xbeff037b,
-	0xb97b02dc, 0x8f7b997b,
-	0xb97a3a05, 0x807a817a,
-	0xbf0d997b, 0xbf850002,
-	0x8f7a897a, 0xbf820001,
-	0x8f7a8a7a, 0xb97b1e06,
-	0x8f7b8a7b, 0x807a7b7a,
+	0x7e008200, 0xbefa037e,
 	0x877bff7f, 0x0000ffff,
-	0x807aff7a, 0x00000200,
-	0x807a7e7a, 0x827b807b,
-	0xd7610000, 0x00010870,
-	0xd7610000, 0x00010a71,
-	0xd7610000, 0x00010c72,
-	0xd7610000, 0x00010e73,
-	0xd7610000, 0x00011074,
-	0xd7610000, 0x00011275,
-	0xd7610000, 0x00011476,
-	0xd7610000, 0x00011677,
-	0xd7610000, 0x00011a79,
-	0xd7610000, 0x00011c7e,
-	0xd7610000, 0x00011e7f,
-	0xbefe03ff, 0x00003fff,
-	0xbeff0380, 0xdc5f8040,
-	0x007a0000, 0xd760007a,
-	0x00011d00, 0xd760007b,
-	0x00011f00, 0xbefe037a,
-	0xbeff037b, 0xbef4037e,
-	0x8775ff7f, 0x0000ffff,
-	0x8875ff75, 0x00040000,
-	0xbef60380, 0xbef703ff,
-	0x10807fac, 0xbef1037c,
-	0xbef00380, 0xb97302dc,
-	0x8f739973, 0xbefe03c1,
-	0x907c9973, 0x877c817c,
-	0xbf06817c, 0xbf850002,
-	0xbeff0380, 0xbf820002,
-	0xbeff03c1, 0xbf820009,
+	0xbefe03c1, 0xbeff03c1,
+	0xdc5f8000, 0x007a0000,
+	0x7e000280, 0xbefe037a,
+	0xbeff037b, 0xb97b02dc,
+	0x8f7b997b, 0xb97a3a05,
+	0x807a817a, 0xbf0d997b,
+	0xbf850002, 0x8f7a897a,
+	0xbf820001, 0x8f7a8a7a,
+	0xb97b1e06, 0x8f7b8a7b,
+	0x807a7b7a, 0x877bff7f,
+	0x0000ffff, 0x807aff7a,
+	0x00000200, 0x807a7e7a,
+	0x827b807b, 0xd7610000,
+	0x00010870, 0xd7610000,
+	0x00010a71, 0xd7610000,
+	0x00010c72, 0xd7610000,
+	0x00010e73, 0xd7610000,
+	0x00011074, 0xd7610000,
+	0x00011275, 0xd7610000,
+	0x00011476, 0xd7610000,
+	0x00011677, 0xd7610000,
+	0x00011a79, 0xd7610000,
+	0x00011c7e, 0xd7610000,
+	0x00011e7f, 0xbefe03ff,
+	0x00003fff, 0xbeff0380,
+	0xdc5f8040, 0x007a0000,
+	0xd760007a, 0x00011d00,
+	0xd760007b, 0x00011f00,
+	0xbefe037a, 0xbeff037b,
+	0xbef4037e, 0x8775ff7f,
+	0x0000ffff, 0x8875ff75,
+	0x00040000, 0xbef60380,
+	0xbef703ff, 0x10807fac,
+	0xbef1037c, 0xbef00380,
+	0xb97302dc, 0x8f739973,
+	0xbefe03c1, 0x907c9973,
+	0x877c817c, 0xbf06817c,
+	0xbf850002, 0xbeff0380,
+	0xbf820002, 0xbeff03c1,
+	0xbf820009, 0xbef603ff,
+	0x01000000, 0xe0704080,
+	0x705d0100, 0xe0704100,
+	0x705d0200, 0xe0704180,
+	0x705d0300, 0xbf820008,
 	0xbef603ff, 0x01000000,
-	0xe0704080, 0x705d0100,
-	0xe0704100, 0x705d0200,
-	0xe0704180, 0x705d0300,
-	0xbf820008, 0xbef603ff,
-	0x01000000, 0xe0704100,
-	0x705d0100, 0xe0704200,
-	0x705d0200, 0xe0704300,
-	0x705d0300, 0xb9703a05,
-	0x80708170, 0xbf0d9973,
-	0xbf850002, 0x8f708970,
-	0xbf820001, 0x8f708a70,
-	0xb97a1e06, 0x8f7a8a7a,
-	0x80707a70, 0x8070ff70,
-	0x00000200, 0xbef603ff,
-	0x01000000, 0x7e000280,
-	0x7e020280, 0x7e040280,
-	0xbefc0380, 0xd7610002,
-	0x0000f871, 0x807c817c,
-	0xd7610002, 0x0000f86c,
-	0x807c817c, 0x8a7aff6d,
-	0x80000000, 0xd7610002,
-	0x0000f87a, 0x807c817c,
-	0xd7610002, 0x0000f86e,
-	0x807c817c, 0xd7610002,
-	0x0000f86f, 0x807c817c,
-	0xd7610002, 0x0000f878,
-	0x807c817c, 0xb97af803,
-	0xd7610002, 0x0000f87a,
-	0x807c817c, 0xd7610002,
-	0x0000f87b, 0x807c817c,
-	0xb971f801, 0xd7610002,
-	0x0000f871, 0x807c817c,
-	0xb971f814, 0xd7610002,
-	0x0000f871, 0x807c817c,
-	0xb971f815, 0xd7610002,
-	0x0000f871, 0x807c817c,
-	0xbefe03ff, 0x0000ffff,
-	0xbeff0380, 0xe0704000,
-	0x705d0200, 0xbefe03c1,
+	0xe0704100, 0x705d0100,
+	0xe0704200, 0x705d0200,
+	0xe0704300, 0x705d0300,
 	0xb9703a05, 0x80708170,
 	0xbf0d9973, 0xbf850002,
 	0x8f708970, 0xbf820001,
 	0x8f708a70, 0xb97a1e06,
 	0x8f7a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
 	0xbef603ff, 0x01000000,
-	0xbef90380, 0xbefc0380,
-	0xbf800000, 0xbe802f00,
-	0xbe822f02, 0xbe842f04,
-	0xbe862f06, 0xbe882f08,
-	0xbe8a2f0a, 0xbe8c2f0c,
-	0xbe8e2f0e, 0xd7610002,
-	0x0000f200, 0x80798179,
-	0xd7610002, 0x0000f201,
+	0x7e000280, 0x7e020280,
+	0x7e040280, 0xbefc0380,
+	0xd7610002, 0x0000f871,
+	0x807c817c, 0xd7610002,
+	0x0000f86c, 0x807c817c,
+	0x8a7aff6d, 0x80000000,
+	0xd7610002, 0x0000f87a,
+	0x807c817c, 0xd7610002,
+	0x0000f86e, 0x807c817c,
+	0xd7610002, 0x0000f86f,
+	0x807c817c, 0xd7610002,
+	0x0000f878, 0x807c817c,
+	0xb97af803, 0xd7610002,
+	0x0000f87a, 0x807c817c,
+	0xd7610002, 0x0000f87b,
+	0x807c817c, 0xb971f801,
+	0xd7610002, 0x0000f871,
+	0x807c817c, 0xb971f814,
+	0xd7610002, 0x0000f871,
+	0x807c817c, 0xb971f815,
+	0xd7610002, 0x0000f871,
+	0x807c817c, 0xbefe03ff,
+	0x0000ffff, 0xbeff0380,
+	0xe0704000, 0x705d0200,
+	0xbefe03c1, 0xb9703a05,
+	0x80708170, 0xbf0d9973,
+	0xbf850002, 0x8f708970,
+	0xbf820001, 0x8f708a70,
+	0xb97a1e06, 0x8f7a8a7a,
+	0x80707a70, 0xbef603ff,
+	0x01000000, 0xbef90380,
+	0xbefc0380, 0xbf800000,
+	0xbe802f00, 0xbe822f02,
+	0xbe842f04, 0xbe862f06,
+	0xbe882f08, 0xbe8a2f0a,
+	0xbe8c2f0c, 0xbe8e2f0e,
+	0xd7610002, 0x0000f200,
 	0x80798179, 0xd7610002,
-	0x0000f202, 0x80798179,
-	0xd7610002, 0x0000f203,
+	0x0000f201, 0x80798179,
+	0xd7610002, 0x0000f202,
 	0x80798179, 0xd7610002,
-	0x0000f204, 0x80798179,
-	0xd7610002, 0x0000f205,
+	0x0000f203, 0x80798179,
+	0xd7610002, 0x0000f204,
 	0x80798179, 0xd7610002,
-	0x0000f206, 0x80798179,
-	0xd7610002, 0x0000f207,
+	0x0000f205, 0x80798179,
+	0xd7610002, 0x0000f206,
 	0x80798179, 0xd7610002,
-	0x0000f208, 0x80798179,
-	0xd7610002, 0x0000f209,
+	0x0000f207, 0x80798179,
+	0xd7610002, 0x0000f208,
 	0x80798179, 0xd7610002,
-	0x0000f20a, 0x80798179,
-	0xd7610002, 0x0000f20b,
+	0x0000f209, 0x80798179,
+	0xd7610002, 0x0000f20a,
 	0x80798179, 0xd7610002,
-	0x0000f20c, 0x80798179,
-	0xd7610002, 0x0000f20d,
+	0x0000f20b, 0x80798179,
+	0xd7610002, 0x0000f20c,
 	0x80798179, 0xd7610002,
-	0x0000f20e, 0x80798179,
-	0xd7610002, 0x0000f20f,
-	0x80798179, 0xbf06a079,
-	0xbf840006, 0xe0704000,
-	0x705d0200, 0x8070ff70,
-	0x00000080, 0xbef90380,
-	0x7e040280, 0x807c907c,
-	0xbf0aff7c, 0x00000060,
-	0xbf85ffbc, 0xbe802f00,
-	0xbe822f02, 0xbe842f04,
-	0xbe862f06, 0xbe882f08,
-	0xbe8a2f0a, 0xd7610002,
-	0x0000f200, 0x80798179,
-	0xd7610002, 0x0000f201,
+	0x0000f20d, 0x80798179,
+	0xd7610002, 0x0000f20e,
 	0x80798179, 0xd7610002,
-	0x0000f202, 0x80798179,
-	0xd7610002, 0x0000f203,
+	0x0000f20f, 0x80798179,
+	0xbf06a079, 0xbf840006,
+	0xe0704000, 0x705d0200,
+	0x8070ff70, 0x00000080,
+	0xbef90380, 0x7e040280,
+	0x807c907c, 0xbf0aff7c,
+	0x00000060, 0xbf85ffbc,
+	0xbe802f00, 0xbe822f02,
+	0xbe842f04, 0xbe862f06,
+	0xbe882f08, 0xbe8a2f0a,
+	0xd7610002, 0x0000f200,
 	0x80798179, 0xd7610002,
-	0x0000f204, 0x80798179,
-	0xd7610002, 0x0000f205,
+	0x0000f201, 0x80798179,
+	0xd7610002, 0x0000f202,
 	0x80798179, 0xd7610002,
-	0x0000f206, 0x80798179,
-	0xd7610002, 0x0000f207,
+	0x0000f203, 0x80798179,
+	0xd7610002, 0x0000f204,
 	0x80798179, 0xd7610002,
-	0x0000f208, 0x80798179,
-	0xd7610002, 0x0000f209,
+	0x0000f205, 0x80798179,
+	0xd7610002, 0x0000f206,
 	0x80798179, 0xd7610002,
-	0x0000f20a, 0x80798179,
-	0xd7610002, 0x0000f20b,
-	0x80798179, 0xe0704000,
-	0x705d0200, 0xbefe03c1,
-	0x907c9973, 0x877c817c,
-	0xbf06817c, 0xbf850002,
-	0xbeff0380, 0xbf820001,
-	0xbeff03c1, 0xb97b4306,
-	0x877bc17b, 0xbf840044,
-	0xbf8a0000, 0x877aff6d,
-	0x80000000, 0xbf840040,
-	0x8f7b867b, 0x8f7b827b,
-	0xbef6037b, 0xb9703a05,
-	0x80708170, 0xbf0d9973,
-	0xbf850002, 0x8f708970,
-	0xbf820001, 0x8f708a70,
-	0xb97a1e06, 0x8f7a8a7a,
-	0x80707a70, 0x8070ff70,
-	0x00000200, 0x8070ff70,
-	0x00000080, 0xbef603ff,
-	0x01000000, 0xd7650000,
-	0x000100c1, 0xd7660000,
-	0x000200c1, 0x16000084,
-	0x907c9973, 0x877c817c,
-	0xbf06817c, 0xbefc0380,
-	0xbf850012, 0xbe8303ff,
-	0x00000080, 0xbf800000,
-	0xbf800000, 0xbf800000,
-	0xd8d80000, 0x01000000,
-	0xbf8c0000, 0xe0704000,
-	0x705d0100, 0x807c037c,
-	0x80700370, 0xd5250000,
-	0x0001ff00, 0x00000080,
-	0xbf0a7b7c, 0xbf85fff4,
-	0xbf820011, 0xbe8303ff,
-	0x00000100, 0xbf800000,
-	0xbf800000, 0xbf800000,
-	0xd8d80000, 0x01000000,
-	0xbf8c0000, 0xe0704000,
-	0x705d0100, 0x807c037c,
-	0x80700370, 0xd5250000,
-	0x0001ff00, 0x00000100,
-	0xbf0a7b7c, 0xbf85fff4,
+	0x0000f207, 0x80798179,
+	0xd7610002, 0x0000f208,
+	0x80798179, 0xd7610002,
+	0x0000f209, 0x80798179,
+	0xd7610002, 0x0000f20a,
+	0x80798179, 0xd7610002,
+	0x0000f20b, 0x80798179,
+	0xe0704000, 0x705d0200,
 	0xbefe03c1, 0x907c9973,
 	0x877c817c, 0xbf06817c,
-	0xbf850004, 0xbef003ff,
-	0x00000200, 0xbeff0380,
-	0xbf820003, 0xbef003ff,
-	0x00000400, 0xbeff03c1,
-	0xb97b3a05, 0x807b817b,
-	0x8f7b827b, 0x907c9973,
+	0xbf850002, 0xbeff0380,
+	0xbf820001, 0xbeff03c1,
+	0xb97b4306, 0x877bc17b,
+	0xbf840044, 0xbf8a0000,
+	0x877aff6d, 0x80000000,
+	0xbf840040, 0x8f7b867b,
+	0x8f7b827b, 0xbef6037b,
+	0xb9703a05, 0x80708170,
+	0xbf0d9973, 0xbf850002,
+	0x8f708970, 0xbf820001,
+	0x8f708a70, 0xb97a1e06,
+	0x8f7a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
+	0x8070ff70, 0x00000080,
+	0xbef603ff, 0x01000000,
+	0xd7650000, 0x000100c1,
+	0xd7660000, 0x000200c1,
+	0x16000084, 0x907c9973,
 	0x877c817c, 0xbf06817c,
-	0xbf850017, 0xbef603ff,
-	0x01000000, 0xbefc0384,
-	0xbf0a7b7c, 0xbf840037,
-	0x7e008700, 0x7e028701,
-	0x7e048702, 0x7e068703,
-	0xe0704000, 0x705d0000,
-	0xe0704080, 0x705d0100,
-	0xe0704100, 0x705d0200,
-	0xe0704180, 0x705d0300,
-	0x807c847c, 0x8070ff70,
-	0x00000200, 0xbf0a7b7c,
-	0xbf85ffef, 0xbf820025,
+	0xbefc0380, 0xbf850012,
+	0xbe8303ff, 0x00000080,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf8c0000,
+	0xe0704000, 0x705d0100,
+	0x807c037c, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000080, 0xbf0a7b7c,
+	0xbf85fff4, 0xbf820011,
+	0xbe8303ff, 0x00000100,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf8c0000,
+	0xe0704000, 0x705d0100,
+	0x807c037c, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000100, 0xbf0a7b7c,
+	0xbf85fff4, 0xbefe03c1,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbf850004,
+	0xbef003ff, 0x00000200,
+	0xbeff0380, 0xbf820003,
+	0xbef003ff, 0x00000400,
+	0xbeff03c1, 0xb97b3a05,
+	0x807b817b, 0x8f7b827b,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbf850017,
 	0xbef603ff, 0x01000000,
 	0xbefc0384, 0xbf0a7b7c,
-	0xbf840011, 0x7e008700,
+	0xbf840037, 0x7e008700,
 	0x7e028701, 0x7e048702,
 	0x7e068703, 0xe0704000,
-	0x705d0000, 0xe0704100,
-	0x705d0100, 0xe0704200,
-	0x705d0200, 0xe0704300,
+	0x705d0000, 0xe0704080,
+	0x705d0100, 0xe0704100,
+	0x705d0200, 0xe0704180,
 	0x705d0300, 0x807c847c,
-	0x8070ff70, 0x00000400,
+	0x8070ff70, 0x00000200,
 	0xbf0a7b7c, 0xbf85ffef,
-	0xb97b1e06, 0x877bc17b,
-	0xbf84000c, 0x8f7b837b,
-	0x807b7c7b, 0xbefe03c1,
-	0xbeff0380, 0x7e008700,
+	0xbf820025, 0xbef603ff,
+	0x01000000, 0xbefc0384,
+	0xbf0a7b7c, 0xbf840011,
+	0x7e008700, 0x7e028701,
+	0x7e048702, 0x7e068703,
 	0xe0704000, 0x705d0000,
-	0x807c817c, 0x8070ff70,
-	0x00000080, 0xbf0a7b7c,
-	0xbf85fff8, 0xbf82013b,
-	0xbef4037e, 0x8775ff7f,
-	0x0000ffff, 0x8875ff75,
-	0x00040000, 0xbef60380,
-	0xbef703ff, 0x10807fac,
-	0xb97202dc, 0x8f729972,
-	0x876eff7f, 0x04000000,
-	0xbf840034, 0xbefe03c1,
-	0x907c9972, 0x877c817c,
-	0xbf06817c, 0xbf850002,
-	0xbeff0380, 0xbf820001,
-	0xbeff03c1, 0xb96f4306,
-	0x876fc16f, 0xbf840029,
-	0x8f6f866f, 0x8f6f826f,
-	0xbef6036f, 0xb9783a05,
-	0x80788178, 0xbf0d9972,
-	0xbf850002, 0x8f788978,
-	0xbf820001, 0x8f788a78,
-	0xb96e1e06, 0x8f6e8a6e,
-	0x80786e78, 0x8078ff78,
-	0x00000200, 0x8078ff78,
-	0x00000080, 0xbef603ff,
-	0x01000000, 0x907c9972,
-	0x877c817c, 0xbf06817c,
-	0xbefc0380, 0xbf850009,
-	0xe0310000, 0x781d0000,
-	0x807cff7c, 0x00000080,
-	0x8078ff78, 0x00000080,
-	0xbf0a6f7c, 0xbf85fff8,
-	0xbf820008, 0xe0310000,
-	0x781d0000, 0x807cff7c,
-	0x00000100, 0x8078ff78,
-	0x00000100, 0xbf0a6f7c,
-	0xbf85fff8, 0xbef80380,
+	0xe0704100, 0x705d0100,
+	0xe0704200, 0x705d0200,
+	0xe0704300, 0x705d0300,
+	0x807c847c, 0x8070ff70,
+	0x00000400, 0xbf0a7b7c,
+	0xbf85ffef, 0xb97b1e06,
+	0x877bc17b, 0xbf84000c,
+	0x8f7b837b, 0x807b7c7b,
+	0xbefe03c1, 0xbeff0380,
+	0x7e008700, 0xe0704000,
+	0x705d0000, 0x807c817c,
+	0x8070ff70, 0x00000080,
+	0xbf0a7b7c, 0xbf85fff8,
+	0xbf82013b, 0xbef4037e,
+	0x8775ff7f, 0x0000ffff,
+	0x8875ff75, 0x00040000,
+	0xbef60380, 0xbef703ff,
+	0x10807fac, 0xb97202dc,
+	0x8f729972, 0x876eff7f,
+	0x04000000, 0xbf840034,
 	0xbefe03c1, 0x907c9972,
 	0x877c817c, 0xbf06817c,
 	0xbf850002, 0xbeff0380,
 	0xbf820001, 0xbeff03c1,
-	0xb96f3a05, 0x806f816f,
-	0x8f6f826f, 0x907c9972,
-	0x877c817c, 0xbf06817c,
-	0xbf850024, 0xbef603ff,
-	0x01000000, 0xbeee0378,
+	0xb96f4306, 0x876fc16f,
+	0xbf840029, 0x8f6f866f,
+	0x8f6f826f, 0xbef6036f,
+	0xb9783a05, 0x80788178,
+	0xbf0d9972, 0xbf850002,
+	0x8f788978, 0xbf820001,
+	0x8f788a78, 0xb96e1e06,
+	0x8f6e8a6e, 0x80786e78,
 	0x8078ff78, 0x00000200,
-	0xbefc0384, 0xbf0a6f7c,
-	0xbf840050, 0xe0304000,
-	0x785d0000, 0xe0304080,
-	0x785d0100, 0xe0304100,
-	0x785d0200, 0xe0304180,
-	0x785d0300, 0xbf8c3f70,
-	0x7e008500, 0x7e028501,
-	0x7e048502, 0x7e068503,
-	0x807c847c, 0x8078ff78,
-	0x00000200, 0xbf0a6f7c,
-	0xbf85ffee, 0xe0304000,
-	0x6e5d0000, 0xe0304080,
-	0x6e5d0100, 0xe0304100,
-	0x6e5d0200, 0xe0304180,
-	0x6e5d0300, 0xbf8c3f70,
-	0xbf820034, 0xbef603ff,
-	0x01000000, 0xbeee0378,
-	0x8078ff78, 0x00000400,
-	0xbefc0384, 0xbf0a6f7c,
-	0xbf840012, 0xe0304000,
-	0x785d0000, 0xe0304100,
-	0x785d0100, 0xe0304200,
-	0x785d0200, 0xe0304300,
-	0x785d0300, 0xbf8c3f70,
-	0x7e008500, 0x7e028501,
-	0x7e048502, 0x7e068503,
-	0x807c847c, 0x8078ff78,
-	0x00000400, 0xbf0a6f7c,
-	0xbf85ffee, 0xb96f1e06,
-	0x876fc16f, 0xbf84000e,
-	0x8f6f836f, 0x806f7c6f,
-	0xbefe03c1, 0xbeff0380,
+	0x8078ff78, 0x00000080,
+	0xbef603ff, 0x01000000,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbefc0380,
+	0xbf850009, 0xe0310000,
+	0x781d0000, 0x807cff7c,
+	0x00000080, 0x8078ff78,
+	0x00000080, 0xbf0a6f7c,
+	0xbf85fff8, 0xbf820008,
+	0xe0310000, 0x781d0000,
+	0x807cff7c, 0x00000100,
+	0x8078ff78, 0x00000100,
+	0xbf0a6f7c, 0xbf85fff8,
+	0xbef80380, 0xbefe03c1,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbf850002,
+	0xbeff0380, 0xbf820001,
+	0xbeff03c1, 0xb96f3a05,
+	0x806f816f, 0x8f6f826f,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbf850024,
+	0xbef603ff, 0x01000000,
+	0xbeee0378, 0x8078ff78,
+	0x00000200, 0xbefc0384,
+	0xbf0a6f7c, 0xbf840050,
 	0xe0304000, 0x785d0000,
+	0xe0304080, 0x785d0100,
+	0xe0304100, 0x785d0200,
+	0xe0304180, 0x785d0300,
 	0xbf8c3f70, 0x7e008500,
-	0x807c817c, 0x8078ff78,
-	0x00000080, 0xbf0a6f7c,
-	0xbf85fff7, 0xbeff03c1,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807c847c,
+	0x8078ff78, 0x00000200,
+	0xbf0a6f7c, 0xbf85ffee,
 	0xe0304000, 0x6e5d0000,
-	0xe0304100, 0x6e5d0100,
-	0xe0304200, 0x6e5d0200,
-	0xe0304300, 0x6e5d0300,
-	0xbf8c3f70, 0xb9783a05,
-	0x80788178, 0xbf0d9972,
-	0xbf850002, 0x8f788978,
-	0xbf820001, 0x8f788a78,
-	0xb96e1e06, 0x8f6e8a6e,
-	0x80786e78, 0x8078ff78,
-	0x00000200, 0x80f8ff78,
-	0x00000050, 0xbef603ff,
-	0x01000000, 0xbefc03ff,
-	0x0000006c, 0x80f89078,
-	0xf429003a, 0xf0000000,
-	0xbf8cc07f, 0x80fc847c,
-	0xbf800000, 0xbe803100,
-	0xbe823102, 0x80f8a078,
-	0xf42d003a, 0xf0000000,
-	0xbf8cc07f, 0x80fc887c,
-	0xbf800000, 0xbe803100,
-	0xbe823102, 0xbe843104,
-	0xbe863106, 0x80f8c078,
-	0xf431003a, 0xf0000000,
-	0xbf8cc07f, 0x80fc907c,
-	0xbf800000, 0xbe803100,
-	0xbe823102, 0xbe843104,
-	0xbe863106, 0xbe883108,
-	0xbe8a310a, 0xbe8c310c,
-	0xbe8e310e, 0xbf06807c,
-	0xbf84fff0, 0xba80f801,
-	0x00000000, 0xbf8a0000,
+	0xe0304080, 0x6e5d0100,
+	0xe0304100, 0x6e5d0200,
+	0xe0304180, 0x6e5d0300,
+	0xbf8c3f70, 0xbf820034,
+	0xbef603ff, 0x01000000,
+	0xbeee0378, 0x8078ff78,
+	0x00000400, 0xbefc0384,
+	0xbf0a6f7c, 0xbf840012,
+	0xe0304000, 0x785d0000,
+	0xe0304100, 0x785d0100,
+	0xe0304200, 0x785d0200,
+	0xe0304300, 0x785d0300,
+	0xbf8c3f70, 0x7e008500,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807c847c,
+	0x8078ff78, 0x00000400,
+	0xbf0a6f7c, 0xbf85ffee,
+	0xb96f1e06, 0x876fc16f,
+	0xbf84000e, 0x8f6f836f,
+	0x806f7c6f, 0xbefe03c1,
+	0xbeff0380, 0xe0304000,
+	0x785d0000, 0xbf8c3f70,
+	0x7e008500, 0x807c817c,
+	0x8078ff78, 0x00000080,
+	0xbf0a6f7c, 0xbf85fff7,
+	0xbeff03c1, 0xe0304000,
+	0x6e5d0000, 0xe0304100,
+	0x6e5d0100, 0xe0304200,
+	0x6e5d0200, 0xe0304300,
+	0x6e5d0300, 0xbf8c3f70,
 	0xb9783a05, 0x80788178,
 	0xbf0d9972, 0xbf850002,
 	0x8f788978, 0xbf820001,
 	0x8f788a78, 0xb96e1e06,
 	0x8f6e8a6e, 0x80786e78,
 	0x8078ff78, 0x00000200,
+	0x80f8ff78, 0x00000050,
 	0xbef603ff, 0x01000000,
-	0xf4211bfa, 0xf0000000,
-	0x80788478, 0xf4211b3a,
+	0xbefc03ff, 0x0000006c,
+	0x80f89078, 0xf429003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc847c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0x80f8a078, 0xf42d003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc887c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0xbe843104, 0xbe863106,
+	0x80f8c078, 0xf431003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc907c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0xbe843104, 0xbe863106,
+	0xbe883108, 0xbe8a310a,
+	0xbe8c310c, 0xbe8e310e,
+	0xbf06807c, 0xbf84fff0,
+	0xba80f801, 0x00000000,
+	0xbf8a0000, 0xb9783a05,
+	0x80788178, 0xbf0d9972,
+	0xbf850002, 0x8f788978,
+	0xbf820001, 0x8f788a78,
+	0xb96e1e06, 0x8f6e8a6e,
+	0x80786e78, 0x8078ff78,
+	0x00000200, 0xbef603ff,
+	0x01000000, 0xf4211bfa,
 	0xf0000000, 0x80788478,
-	0xf4211b7a, 0xf0000000,
-	0x80788478, 0xf4211c3a,
+	0xf4211b3a, 0xf0000000,
+	0x80788478, 0xf4211b7a,
 	0xf0000000, 0x80788478,
-	0xf4211c7a, 0xf0000000,
-	0x80788478, 0xf4211eba,
+	0xf4211c3a, 0xf0000000,
+	0x80788478, 0xf4211c7a,
 	0xf0000000, 0x80788478,
-	0xf4211efa, 0xf0000000,
-	0x80788478, 0xf4211e7a,
+	0xf4211eba, 0xf0000000,
+	0x80788478, 0xf4211efa,
 	0xf0000000, 0x80788478,
-	0xf4211cfa, 0xf0000000,
-	0x80788478, 0xf4211bba,
+	0xf4211e7a, 0xf0000000,
+	0x80788478, 0xf4211cfa,
 	0xf0000000, 0x80788478,
-	0xbf8cc07f, 0xb9eef814,
 	0xf4211bba, 0xf0000000,
 	0x80788478, 0xbf8cc07f,
-	0xb9eef815, 0xbefc036f,
-	0xbefe0370, 0xbeff0371,
-	0x876f7bff, 0x000003ff,
-	0xb9ef4803, 0x876f7bff,
-	0xfffff800, 0x906f8b6f,
-	0xb9efa2c3, 0xb9f3f801,
-	0xb96e3a05, 0x806e816e,
-	0xbf0d9972, 0xbf850002,
-	0x8f6e896e, 0xbf820001,
-	0x8f6e8a6e, 0xb96f1e06,
-	0x8f6f8a6f, 0x806e6f6e,
-	0x806eff6e, 0x00000200,
-	0x806e746e, 0x826f8075,
-	0x876fff6f, 0x0000ffff,
-	0xf4091c37, 0xfa000050,
-	0xf4091d37, 0xfa000060,
-	0xf4011e77, 0xfa000074,
-	0xbf8cc07f, 0x876dff6d,
-	0x0000ffff, 0x87fe7e7e,
-	0x87ea6a6a, 0xb9faf802,
-	0xbe80226c, 0xbf810000,
+	0xb9eef814, 0xf4211bba,
+	0xf0000000, 0x80788478,
+	0xbf8cc07f, 0xb9eef815,
+	0xbefc036f, 0xbefe0370,
+	0xbeff0371, 0x876f7bff,
+	0x000003ff, 0xb9ef4803,
+	0x876f7bff, 0xfffff800,
+	0x906f8b6f, 0xb9efa2c3,
+	0xb9f3f801, 0xb96e3a05,
+	0x806e816e, 0xbf0d9972,
+	0xbf850002, 0x8f6e896e,
+	0xbf820001, 0x8f6e8a6e,
+	0xb96f1e06, 0x8f6f8a6f,
+	0x806e6f6e, 0x806eff6e,
+	0x00000200, 0x806e746e,
+	0x826f8075, 0x876fff6f,
+	0x0000ffff, 0xf4091c37,
+	0xfa000050, 0xf4091d37,
+	0xfa000060, 0xf4011e77,
+	0xfa000074, 0xbf8cc07f,
+	0x876dff6d, 0x0000ffff,
+	0x87fe7e7e, 0x87ea6a6a,
+	0xb9faf802, 0xbe80226c,
+	0xbf810000, 0xbf9f0000,
 	0xbf9f0000, 0xbf9f0000,
 	0xbf9f0000, 0xbf9f0000,
-	0xbf9f0000, 0x00000000,
 };
 
 static const uint32_t cwsr_trap_gfx11_hex[] = {
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index fdab64624422..e0140df0b0ec 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -369,6 +369,12 @@ L_SLEEP:
 	s_or_b32	s_save_pc_hi, s_save_pc_hi, s_save_tmp
 
 #if NO_SQC_STORE
+#if ASIC_FAMILY <= CHIP_SIENNA_CICHLID
+	// gfx10: If there was a VALU exception, the exception state must be
+	// cleared before executing the VALU instructions below.
+	v_clrexcp
+#endif
+
 	// Trap temporaries must be saved via VGPR but all VGPRs are in use.
 	// There is no ttmp space to hold the resource constant for VGPR save.
 	// Save v0 by itself since it requires only two SGPRs.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f6d4748c1980..ce4c52ec34d8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1564,16 +1564,11 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
 {
 	struct kfd_ioctl_import_dmabuf_args *args = data;
 	struct kfd_process_device *pdd;
-	struct dma_buf *dmabuf;
 	int idr_handle;
 	uint64_t size;
 	void *mem;
 	int r;
 
-	dmabuf = dma_buf_get(args->dmabuf_fd);
-	if (IS_ERR(dmabuf))
-		return PTR_ERR(dmabuf);
-
 	mutex_lock(&p->mutex);
 	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
 	if (!pdd) {
@@ -1587,10 +1582,10 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
 		goto err_unlock;
 	}
 
-	r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf,
-					      args->va_addr, pdd->drm_priv,
-					      (struct kgd_mem **)&mem, &size,
-					      NULL);
+	r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd,
+						 args->va_addr, pdd->drm_priv,
+						 (struct kgd_mem **)&mem, &size,
+						 NULL);
 	if (r)
 		goto err_unlock;
 
@@ -1601,7 +1596,6 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
 	}
 
 	mutex_unlock(&p->mutex);
-	dma_buf_put(dmabuf);
 
 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
 
@@ -1612,7 +1606,6 @@ err_free:
 					       pdd->drm_priv, NULL);
 err_unlock:
 	mutex_unlock(&p->mutex);
-	dma_buf_put(dmabuf);
 	return r;
 }
 
@@ -1855,8 +1848,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
 	return num_of_bos;
 }
 
-static int criu_get_prime_handle(struct kgd_mem *mem, int flags,
-				      u32 *shared_fd)
+static int criu_get_prime_handle(struct kgd_mem *mem,
+				 int flags, u32 *shared_fd)
 {
 	struct dma_buf *dmabuf;
 	int ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 0f58be65132f..739721254a5d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -880,6 +880,10 @@ static int copy_signaled_event_data(uint32_t num_events,
 				dst = &data[i].memory_exception_data;
 				src = &event->memory_exception_data;
 				size = sizeof(struct kfd_hsa_memory_exception_data);
+			} else if (event->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+				dst = &data[i].memory_exception_data;
+				src = &event->hw_exception_data;
+				size = sizeof(struct kfd_hsa_hw_exception_data);
 			} else if (event->type == KFD_EVENT_TYPE_SIGNAL &&
 				waiter->event_age_enabled) {
 				dst = &data[i].signal_event_data.last_event_age;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 62b205dac63a..6604a3f99c5e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -330,12 +330,6 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
 	pdd->gpuvm_limit =
 		pdd->dev->kfd->shared_resources.gpuvm_size - 1;
 
-	/* dGPUs: the reserved space for kernel
-	 * before SVM
-	 */
-	pdd->qpd.cwsr_base = SVM_CWSR_BASE;
-	pdd->qpd.ib_base = SVM_IB_BASE;
-
 	pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
 	pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
 }
@@ -345,18 +339,18 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
 	pdd->lds_base = MAKE_LDS_APP_BASE_V9();
 	pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
 
-	pdd->gpuvm_base = PAGE_SIZE;
+        /* Raven needs SVM to support graphic handle, etc. Leave the small
+         * reserved space before SVM on Raven as well, even though we don't
+         * have to.
+         * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
+         * are used in Thunk to reserve SVM.
+         */
+        pdd->gpuvm_base = SVM_USER_BASE;
 	pdd->gpuvm_limit =
 		pdd->dev->kfd->shared_resources.gpuvm_size - 1;
 
 	pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
 	pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
-
-	/*
-	 * Place TBA/TMA on opposite side of VM hole to prevent
-	 * stray faults from triggering SVM on these pages.
-	 */
-	pdd->qpd.cwsr_base = pdd->dev->kfd->shared_resources.gpuvm_size;
 }
 
 int kfd_init_apertures(struct kfd_process *process)
@@ -413,6 +407,12 @@ int kfd_init_apertures(struct kfd_process *process)
 					return -EINVAL;
 				}
 			}
+
+                        /* dGPUs: the reserved space for kernel
+                         * before SVM
+                         */
+                        pdd->qpd.cwsr_base = SVM_CWSR_BASE;
+                        pdd->qpd.ib_base = SVM_IB_BASE;
 		}
 
 		dev_dbg(kfd_device, "node id %u\n", id);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6c25dab051d5..d630100b9e91 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -260,19 +260,6 @@ static void svm_migrate_put_sys_page(unsigned long addr)
 	put_page(page);
 }
 
-static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
-{
-	unsigned long cpages = 0;
-	unsigned long i;
-
-	for (i = 0; i < migrate->npages; i++) {
-		if (migrate->src[i] & MIGRATE_PFN_VALID &&
-		    migrate->src[i] & MIGRATE_PFN_MIGRATE)
-			cpages++;
-	}
-	return cpages;
-}
-
 static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
 {
 	unsigned long upages = 0;
@@ -402,6 +389,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 	struct dma_fence *mfence = NULL;
 	struct migrate_vma migrate = { 0 };
 	unsigned long cpages = 0;
+	unsigned long mpages = 0;
 	dma_addr_t *scratch;
 	void *buf;
 	int r = -ENOMEM;
@@ -442,20 +430,21 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 		goto out_free;
 	}
 	if (cpages != npages)
-		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+		pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 			 cpages, npages);
 	else
-		pr_debug("0x%lx pages migrated\n", cpages);
+		pr_debug("0x%lx pages collected\n", cpages);
 
 	r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);
 	migrate_vma_pages(&migrate);
 
-	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
-		svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
-
 	svm_migrate_copy_done(adev, mfence);
 	migrate_vma_finalize(&migrate);
 
+	mpages = cpages - svm_migrate_unsuccessful_pages(&migrate);
+	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+			 mpages, cpages, migrate.npages);
+
 	kfd_smi_event_migration_end(node, p->lead_thread->pid,
 				    start >> PAGE_SHIFT, end >> PAGE_SHIFT,
 				    0, node->id, trigger);
@@ -465,12 +454,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 out_free:
 	kvfree(buf);
 out:
-	if (!r && cpages) {
+	if (!r && mpages) {
 		pdd = svm_range_get_pdd_by_node(prange, node);
 		if (pdd)
-			WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
+			WRITE_ONCE(pdd->page_in, pdd->page_in + mpages);
 
-		return cpages;
+		return mpages;
 	}
 	return r;
 }
@@ -479,6 +468,8 @@ out:
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
  * @mm: the process mm structure
  * @trigger: reason of migration
  *
@@ -489,19 +480,20 @@ out:
  */
 static int
 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+			unsigned long start_mgr, unsigned long last_mgr,
 			struct mm_struct *mm, uint32_t trigger)
 {
 	unsigned long addr, start, end;
 	struct vm_area_struct *vma;
 	uint64_t ttm_res_offset;
 	struct kfd_node *node;
-	unsigned long cpages = 0;
+	unsigned long mpages = 0;
 	long r = 0;
 
-	if (prange->actual_loc == best_loc) {
-		pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-			 prange->svms, prange->start, prange->last, best_loc);
-		return 0;
+	if (start_mgr < prange->start || last_mgr > prange->last) {
+		pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+			 start_mgr, last_mgr, prange->start, prange->last);
+		return -EFAULT;
 	}
 
 	node = svm_range_get_node_by_id(prange, best_loc);
@@ -510,18 +502,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 		return -ENODEV;
 	}
 
-	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
-		 prange->start, prange->last, best_loc);
+	pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+		prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+		best_loc);
 
-	start = prange->start << PAGE_SHIFT;
-	end = (prange->last + 1) << PAGE_SHIFT;
+	start = start_mgr << PAGE_SHIFT;
+	end = (last_mgr + 1) << PAGE_SHIFT;
 
 	r = svm_range_vram_node_new(node, prange, true);
 	if (r) {
 		dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
 		return r;
 	}
-	ttm_res_offset = prange->offset << PAGE_SHIFT;
+	ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT;
 
 	for (addr = start; addr < end;) {
 		unsigned long next;
@@ -536,16 +529,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 			pr_debug("failed %ld to migrate\n", r);
 			break;
 		} else {
-			cpages += r;
+			mpages += r;
 		}
 		ttm_res_offset += next - addr;
 		addr = next;
 	}
 
-	if (cpages) {
+	if (mpages) {
 		prange->actual_loc = best_loc;
-		svm_range_dma_unmap(prange);
-	} else {
+		prange->vram_pages += mpages;
+	} else if (!prange->actual_loc) {
+		/* if no page migrated and all pages from prange are at
+		 * sys ram drop svm_bo got from svm_range_vram_node_new
+		 */
 		svm_range_vram_node_free(prange);
 	}
 
@@ -663,9 +659,8 @@ out_oom:
  * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
  *
  * Return:
- *   0 - success with all pages migrated
  *   negative values - indicate error
- *   positive values - partial migration, number of pages not migrated
+ *   positive values or zero - number of pages got migrated
  */
 static long
 svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
@@ -676,6 +671,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
 	uint64_t npages = (end - start) >> PAGE_SHIFT;
 	unsigned long upages = npages;
 	unsigned long cpages = 0;
+	unsigned long mpages = 0;
 	struct amdgpu_device *adev = node->adev;
 	struct kfd_process_device *pdd;
 	struct dma_fence *mfence = NULL;
@@ -725,10 +721,10 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
 		goto out_free;
 	}
 	if (cpages != npages)
-		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+		pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 			 cpages, npages);
 	else
-		pr_debug("0x%lx pages migrated\n", cpages);
+		pr_debug("0x%lx pages collected\n", cpages);
 
 	r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
 				    scratch, npages);
@@ -751,17 +747,21 @@ out_free:
 	kvfree(buf);
 out:
 	if (!r && cpages) {
+		mpages = cpages - upages;
 		pdd = svm_range_get_pdd_by_node(prange, node);
 		if (pdd)
-			WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
+			WRITE_ONCE(pdd->page_out, pdd->page_out + mpages);
 	}
-	return r ? r : upages;
+
+	return r ? r : mpages;
 }
 
 /**
  * svm_migrate_vram_to_ram - migrate svm range from device to system
  * @prange: range structure
  * @mm: process mm, use current->mm if NULL
+ * @start_mgr: start page need be migrated to sys ram
+ * @last_mgr: last page need be migrated to sys ram
  * @trigger: reason of migration
  * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback
  *
@@ -771,6 +771,7 @@ out:
  * 0 - OK, otherwise error code
  */
 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+			    unsigned long start_mgr, unsigned long last_mgr,
 			    uint32_t trigger, struct page *fault_page)
 {
 	struct kfd_node *node;
@@ -778,26 +779,33 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
 	unsigned long addr;
 	unsigned long start;
 	unsigned long end;
-	unsigned long upages = 0;
+	unsigned long mpages = 0;
 	long r = 0;
 
+	/* this pragne has no any vram page to migrate to sys ram */
 	if (!prange->actual_loc) {
 		pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
 			 prange->start, prange->last);
 		return 0;
 	}
 
+	if (start_mgr < prange->start || last_mgr > prange->last) {
+		pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+			 start_mgr, last_mgr, prange->start, prange->last);
+		return -EFAULT;
+	}
+
 	node = svm_range_get_node_by_id(prange, prange->actual_loc);
 	if (!node) {
 		pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc);
 		return -ENODEV;
 	}
 	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
-		 prange->svms, prange, prange->start, prange->last,
+		 prange->svms, prange, start_mgr, last_mgr,
 		 prange->actual_loc);
 
-	start = prange->start << PAGE_SHIFT;
-	end = (prange->last + 1) << PAGE_SHIFT;
+	start = start_mgr << PAGE_SHIFT;
+	end = (last_mgr + 1) << PAGE_SHIFT;
 
 	for (addr = start; addr < end;) {
 		unsigned long next;
@@ -816,14 +824,21 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
 			pr_debug("failed %ld to migrate prange %p\n", r, prange);
 			break;
 		} else {
-			upages += r;
+			mpages += r;
 		}
 		addr = next;
 	}
 
-	if (r >= 0 && !upages) {
-		svm_range_vram_node_free(prange);
-		prange->actual_loc = 0;
+	if (r >= 0) {
+		prange->vram_pages -= mpages;
+
+		/* prange does not have vram page set its actual_loc to system
+		 * and drop its svm_bo ref
+		 */
+		if (prange->vram_pages == 0 && prange->ttm_res) {
+			prange->actual_loc = 0;
+			svm_range_vram_node_free(prange);
+		}
 	}
 
 	return r < 0 ? r : 0;
@@ -833,17 +848,23 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
  * svm_migrate_vram_to_vram - migrate svm range from device to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start: start page need be migrated to sys ram
+ * @last: last page need be migrated to sys ram
  * @mm: process mm, use current->mm if NULL
  * @trigger: reason of migration
  *
  * Context: Process context, caller hold mmap read lock, svms lock, prange lock
  *
+ * migrate all vram pages in prange to sys ram, then migrate
+ * [start, last] pages from sys ram to gpu node best_loc.
+ *
  * Return:
  * 0 - OK, otherwise error code
  */
 static int
 svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
-			 struct mm_struct *mm, uint32_t trigger)
+			unsigned long start, unsigned long last,
+			struct mm_struct *mm, uint32_t trigger)
 {
 	int r, retries = 3;
 
@@ -855,7 +876,8 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
 	pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
 
 	do {
-		r = svm_migrate_vram_to_ram(prange, mm, trigger, NULL);
+		r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
+					    trigger, NULL);
 		if (r)
 			return r;
 	} while (prange->actual_loc && --retries);
@@ -863,17 +885,21 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
 	if (prange->actual_loc)
 		return -EDEADLK;
 
-	return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
+	return svm_migrate_ram_to_vram(prange, best_loc, start, last, mm, trigger);
 }
 
 int
 svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
+		    unsigned long start, unsigned long last,
 		    struct mm_struct *mm, uint32_t trigger)
 {
-	if  (!prange->actual_loc)
-		return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
+	if  (!prange->actual_loc || prange->actual_loc == best_loc)
+		return svm_migrate_ram_to_vram(prange, best_loc, start, last,
+					       mm, trigger);
+
 	else
-		return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
+		return svm_migrate_vram_to_vram(prange, best_loc, start, last,
+						mm, trigger);
 
 }
 
@@ -889,10 +915,9 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
  */
 static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
 {
+	unsigned long start, last, size;
 	unsigned long addr = vmf->address;
 	struct svm_range_bo *svm_bo;
-	enum svm_work_list_ops op;
-	struct svm_range *parent;
 	struct svm_range *prange;
 	struct kfd_process *p;
 	struct mm_struct *mm;
@@ -929,51 +954,31 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
 
 	mutex_lock(&p->svms.lock);
 
-	prange = svm_range_from_addr(&p->svms, addr, &parent);
+	prange = svm_range_from_addr(&p->svms, addr, NULL);
 	if (!prange) {
 		pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr);
 		r = -EFAULT;
 		goto out_unlock_svms;
 	}
 
-	mutex_lock(&parent->migrate_mutex);
-	if (prange != parent)
-		mutex_lock_nested(&prange->migrate_mutex, 1);
+	mutex_lock(&prange->migrate_mutex);
 
 	if (!prange->actual_loc)
 		goto out_unlock_prange;
 
-	svm_range_lock(parent);
-	if (prange != parent)
-		mutex_lock_nested(&prange->lock, 1);
-	r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
-	if (prange != parent)
-		mutex_unlock(&prange->lock);
-	svm_range_unlock(parent);
-	if (r) {
-		pr_debug("failed %d to split range by granularity\n", r);
-		goto out_unlock_prange;
-	}
+	/* Align migration range start and size to granularity size */
+	size = 1UL << prange->granularity;
+	start = max(ALIGN_DOWN(addr, size), prange->start);
+	last = min(ALIGN(addr + 1, size) - 1, prange->last);
 
-	r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm,
-				    KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,
-				    vmf->page);
+	r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, start, last,
+				    KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, vmf->page);
 	if (r)
 		pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n",
-			 r, prange->svms, prange, prange->start, prange->last);
-
-	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
-	if (p->xnack_enabled && parent == prange)
-		op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
-	else
-		op = SVM_OP_UPDATE_RANGE_NOTIFIER;
-	svm_range_add_list_work(&p->svms, parent, mm, op);
-	schedule_deferred_list_work(&p->svms);
+			r, prange->svms, prange, start, last);
 
 out_unlock_prange:
-	if (prange != parent)
-		mutex_unlock(&prange->migrate_mutex);
-	mutex_unlock(&parent->migrate_mutex);
+	mutex_unlock(&prange->migrate_mutex);
 out_unlock_svms:
 	mutex_unlock(&p->svms.lock);
 out_unref_process:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 487f26368164..2eebf67f9c2c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -41,9 +41,13 @@ enum MIGRATION_COPY_DIR {
 };
 
 int svm_migrate_to_vram(struct svm_range *prange,  uint32_t best_loc,
+			unsigned long start, unsigned long last,
 			struct mm_struct *mm, uint32_t trigger);
+
 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+			    unsigned long start, unsigned long last,
 			    uint32_t trigger, struct page *fault_page);
+
 unsigned long
 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 9cc32f577e38..745024b31340 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -748,7 +748,6 @@ struct kfd_process_device {
 	/* VM context for GPUVM allocations */
 	struct file *drm_file;
 	void *drm_priv;
-	atomic64_t tlb_seq;
 
 	/* GPUVM allocations storage */
 	struct idr alloc_idr;
@@ -971,7 +970,7 @@ struct kfd_process {
 	struct work_struct debug_event_workarea;
 
 	/* Tracks debug per-vmid request for debug flags */
-	bool dbg_flags;
+	u32 dbg_flags;
 
 	atomic_t poison;
 	/* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
@@ -1128,7 +1127,7 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
 	struct kfd_dev *dev = adev->kfd.dev;
 	uint32_t i;
 
-	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3))
+	if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3))
 		return dev->nodes[0];
 
 	for (i = 0; i < dev->num_nodes; i++)
@@ -1462,7 +1461,14 @@ void kfd_signal_reset_event(struct kfd_node *dev);
 
 void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
 
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
+static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
+				 enum TLB_FLUSH_TYPE type)
+{
+	struct amdgpu_device *adev = pdd->dev->adev;
+	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+
+	amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask);
+}
 
 static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 7a33e06f5c90..71df51fcc1b0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -664,7 +664,8 @@ int kfd_process_create_wq(void)
 	if (!kfd_process_wq)
 		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
 	if (!kfd_restore_wq)
-		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
+		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq",
+							 WQ_FREEZABLE);
 
 	if (!kfd_process_wq || !kfd_restore_wq) {
 		kfd_process_destroy_wq();
@@ -1642,6 +1643,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 	struct amdgpu_fpriv *drv_priv;
 	struct amdgpu_vm *avm;
 	struct kfd_process *p;
+	struct dma_fence *ef;
 	struct kfd_node *dev;
 	int ret;
 
@@ -1661,13 +1663,13 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 
 	ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,
 						     &p->kgd_process_info,
-						     &p->ef);
+						     &ef);
 	if (ret) {
 		pr_err("Failed to create process VM object\n");
 		return ret;
 	}
+	RCU_INIT_POINTER(p->ef, ef);
 	pdd->drm_priv = drm_file->private_data;
-	atomic64_set(&pdd->tlb_seq, 0);
 
 	ret = kfd_process_device_reserve_ib_mem(pdd);
 	if (ret)
@@ -1909,6 +1911,21 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
 	return -EINVAL;
 }
 
+static int signal_eviction_fence(struct kfd_process *p)
+{
+	struct dma_fence *ef;
+	int ret;
+
+	rcu_read_lock();
+	ef = dma_fence_get_rcu_safe(&p->ef);
+	rcu_read_unlock();
+
+	ret = dma_fence_signal(ef);
+	dma_fence_put(ef);
+
+	return ret;
+}
+
 static void evict_process_worker(struct work_struct *work)
 {
 	int ret;
@@ -1921,31 +1938,46 @@ static void evict_process_worker(struct work_struct *work)
 	 * lifetime of this thread, kfd_process p will be valid
 	 */
 	p = container_of(dwork, struct kfd_process, eviction_work);
-	WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
-		  "Eviction fence mismatch\n");
-
-	/* Narrow window of overlap between restore and evict work
-	 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
-	 * unreserves KFD BOs, it is possible to evicted again. But
-	 * restore has few more steps of finish. So lets wait for any
-	 * previous restore work to complete
-	 */
-	flush_delayed_work(&p->restore_work);
 
 	pr_debug("Started evicting pasid 0x%x\n", p->pasid);
 	ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
 	if (!ret) {
-		dma_fence_signal(p->ef);
-		dma_fence_put(p->ef);
-		p->ef = NULL;
-		queue_delayed_work(kfd_restore_wq, &p->restore_work,
+		/* If another thread already signaled the eviction fence,
+		 * they are responsible stopping the queues and scheduling
+		 * the restore work.
+		 */
+		if (!signal_eviction_fence(p))
+			queue_delayed_work(kfd_restore_wq, &p->restore_work,
 				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
+		else
+			kfd_process_restore_queues(p);
 
 		pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
 	} else
 		pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
 }
 
+static int restore_process_helper(struct kfd_process *p)
+{
+	int ret = 0;
+
+	/* VMs may not have been acquired yet during debugging. */
+	if (p->kgd_process_info) {
+		ret = amdgpu_amdkfd_gpuvm_restore_process_bos(
+			p->kgd_process_info, &p->ef);
+		if (ret)
+			return ret;
+	}
+
+	ret = kfd_process_restore_queues(p);
+	if (!ret)
+		pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
+	else
+		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
+
+	return ret;
+}
+
 static void restore_process_worker(struct work_struct *work)
 {
 	struct delayed_work *dwork;
@@ -1971,24 +2003,15 @@ static void restore_process_worker(struct work_struct *work)
 	 */
 
 	p->last_restore_timestamp = get_jiffies_64();
-	/* VMs may not have been acquired yet during debugging. */
-	if (p->kgd_process_info)
-		ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
-							     &p->ef);
+
+	ret = restore_process_helper(p);
 	if (ret) {
 		pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
 			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
 		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
 				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
 		WARN(!ret, "reschedule restore work failed\n");
-		return;
 	}
-
-	ret = kfd_process_restore_queues(p);
-	if (!ret)
-		pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
-	else
-		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
 }
 
 void kfd_suspend_all_processes(void)
@@ -1999,14 +2022,9 @@ void kfd_suspend_all_processes(void)
 
 	WARN(debug_evictions, "Evicting all processes");
 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
-		cancel_delayed_work_sync(&p->eviction_work);
-		flush_delayed_work(&p->restore_work);
-
 		if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
 			pr_err("Failed to suspend process 0x%x\n", p->pasid);
-		dma_fence_signal(p->ef);
-		dma_fence_put(p->ef);
-		p->ef = NULL;
+		signal_eviction_fence(p);
 	}
 	srcu_read_unlock(&kfd_processes_srcu, idx);
 }
@@ -2018,7 +2036,7 @@ int kfd_resume_all_processes(void)
 	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
 
 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
-		if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
+		if (restore_process_helper(p)) {
 			pr_err("Restore process %d failed during resume\n",
 			       p->pasid);
 			ret = -EFAULT;
@@ -2059,36 +2077,6 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
 			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
 }
 
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
-{
-	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
-	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
-	struct kfd_node *dev = pdd->dev;
-	uint32_t xcc_mask = dev->xcc_mask;
-	int xcc = 0;
-
-	/*
-	 * It can be that we race and lose here, but that is extremely unlikely
-	 * and the worst thing which could happen is that we flush the changes
-	 * into the TLB once more which is harmless.
-	 */
-	if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
-		return;
-
-	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
-		/* Nothing to flush until a VMID is assigned, which
-		 * only happens when the first queue is created.
-		 */
-		if (pdd->qpd.vmid)
-			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
-							pdd->qpd.vmid);
-	} else {
-		for_each_inst(xcc, xcc_mask)
-			amdgpu_amdkfd_flush_gpu_tlb_pasid(
-				dev->adev, pdd->process->pasid, type, xcc);
-	}
-}
-
 /* assumes caller holds process lock. */
 int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 77649392e233..43eff221eae5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -87,6 +87,8 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
 		return;
 
 	dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
+	if (dev->kfd->shared_resources.enable_mes)
+		amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr);
 	pdd->already_dequeued = true;
 }
 
@@ -169,16 +171,43 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
 	return 0;
 }
 
+static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
+				     struct process_queue_node *pqn)
+{
+	struct kfd_node *dev;
+	struct kfd_process_device *pdd;
+
+	dev = pqn->q->device;
+
+	pdd = kfd_get_process_device_data(dev, pqm->process);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		return;
+	}
+
+	if (pqn->q->gws) {
+		if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+		    !dev->kfd->shared_resources.enable_mes)
+			amdgpu_amdkfd_remove_gws_from_process(
+				pqm->process->kgd_process_info, pqn->q->gws);
+		pdd->qpd.num_gws = 0;
+	}
+
+	if (dev->kfd->shared_resources.enable_mes) {
+		amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo);
+		if (pqn->q->wptr_bo)
+			amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
+	}
+}
+
 void pqm_uninit(struct process_queue_manager *pqm)
 {
 	struct process_queue_node *pqn, *next;
 
 	list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
-		if (pqn->q && pqn->q->gws &&
-		    KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
-		    !pqn->q->device->kfd->shared_resources.enable_mes)
-			amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
-				pqn->q->gws);
+		if (pqn->q)
+			pqm_clean_queue_resource(pqm, pqn);
+
 		kfd_procfs_del_queue(pqn->q);
 		uninit_queue(pqn->q);
 		list_del(&pqn->process_queue_list);
@@ -461,22 +490,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
 				goto err_destroy_queue;
 		}
 
-		if (pqn->q->gws) {
-			if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
-			    !dev->kfd->shared_resources.enable_mes)
-				amdgpu_amdkfd_remove_gws_from_process(
-						pqm->process->kgd_process_info,
-						pqn->q->gws);
-			pdd->qpd.num_gws = 0;
-		}
-
-		if (dev->kfd->shared_resources.enable_mes) {
-			amdgpu_amdkfd_free_gtt_mem(dev->adev,
-						   pqn->q->gang_ctx_bo);
-			if (pqn->q->wptr_bo)
-				amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
-
-		}
+		pqm_clean_queue_resource(pqm, pqn);
 		uninit_queue(pqn->q);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f2f3c338fd94..ac84c4a2ca07 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -198,6 +198,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
 				     addr[i] >> PAGE_SHIFT, page_to_pfn(page));
 	}
+
 	return 0;
 }
 
@@ -349,6 +350,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
 	INIT_LIST_HEAD(&prange->child_list);
 	atomic_set(&prange->invalid, 0);
 	prange->validate_timestamp = 0;
+	prange->vram_pages = 0;
 	mutex_init(&prange->migrate_mutex);
 	mutex_init(&prange->lock);
 
@@ -395,6 +397,8 @@ static void svm_range_bo_release(struct kref *kref)
 			 prange->start, prange->last);
 		mutex_lock(&prange->lock);
 		prange->svm_bo = NULL;
+		/* prange should not hold vram page now */
+		WARN_ONCE(prange->actual_loc, "prange should not hold vram page");
 		mutex_unlock(&prange->lock);
 
 		spin_lock(&svm_bo->list_lock);
@@ -878,14 +882,29 @@ static void svm_range_debug_dump(struct svm_range_list *svms)
 
 static void *
 svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements,
-		     uint64_t offset)
+		     uint64_t offset, uint64_t *vram_pages)
 {
+	unsigned char *src = (unsigned char *)psrc + offset;
 	unsigned char *dst;
+	uint64_t i;
 
 	dst = kvmalloc_array(num_elements, size, GFP_KERNEL);
 	if (!dst)
 		return NULL;
-	memcpy(dst, (unsigned char *)psrc + offset, num_elements * size);
+
+	if (!vram_pages) {
+		memcpy(dst, src, num_elements * size);
+		return (void *)dst;
+	}
+
+	*vram_pages = 0;
+	for (i = 0; i < num_elements; i++) {
+		dma_addr_t *temp;
+		temp = (dma_addr_t *)dst + i;
+		*temp = *((dma_addr_t *)src + i);
+		if (*temp&SVM_RANGE_VRAM_DOMAIN)
+			(*vram_pages)++;
+	}
 
 	return (void *)dst;
 }
@@ -899,7 +918,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
 		if (!src->dma_addr[i])
 			continue;
 		dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i],
-					sizeof(*src->dma_addr[i]), src->npages, 0);
+					sizeof(*src->dma_addr[i]), src->npages, 0, NULL);
 		if (!dst->dma_addr[i])
 			return -ENOMEM;
 	}
@@ -910,7 +929,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
 static int
 svm_range_split_array(void *ppnew, void *ppold, size_t size,
 		      uint64_t old_start, uint64_t old_n,
-		      uint64_t new_start, uint64_t new_n)
+		      uint64_t new_start, uint64_t new_n, uint64_t *new_vram_pages)
 {
 	unsigned char *new, *old, *pold;
 	uint64_t d;
@@ -922,11 +941,12 @@ svm_range_split_array(void *ppnew, void *ppold, size_t size,
 		return 0;
 
 	d = (new_start - old_start) * size;
-	new = svm_range_copy_array(pold, size, new_n, d);
+	/* get dma addr array for new range and calculte its vram page number */
+	new = svm_range_copy_array(pold, size, new_n, d, new_vram_pages);
 	if (!new)
 		return -ENOMEM;
 	d = (new_start == old_start) ? new_n * size : 0;
-	old = svm_range_copy_array(pold, size, old_n, d);
+	old = svm_range_copy_array(pold, size, old_n, d, NULL);
 	if (!old) {
 		kvfree(new);
 		return -ENOMEM;
@@ -948,10 +968,13 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old,
 	for (i = 0; i < MAX_GPU_INSTANCE; i++) {
 		r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i],
 					  sizeof(*old->dma_addr[i]), old->start,
-					  npages, new->start, new->npages);
+					  npages, new->start, new->npages,
+					  old->actual_loc ? &new->vram_pages : NULL);
 		if (r)
 			return r;
 	}
+	if (old->actual_loc)
+		old->vram_pages -= new->vram_pages;
 
 	return 0;
 }
@@ -1097,7 +1120,7 @@ static int
 svm_range_split_tail(struct svm_range *prange, uint64_t new_last,
 		     struct list_head *insert_list, struct list_head *remap_list)
 {
-	struct svm_range *tail;
+	struct svm_range *tail = NULL;
 	int r = svm_range_split(prange, prange->start, new_last, &tail);
 
 	if (!r) {
@@ -1112,7 +1135,7 @@ static int
 svm_range_split_head(struct svm_range *prange, uint64_t new_start,
 		     struct list_head *insert_list, struct list_head *remap_list)
 {
-	struct svm_range *head;
+	struct svm_range *head = NULL;
 	int r = svm_range_split(prange, new_start, prange->last, &head);
 
 	if (!r) {
@@ -1135,66 +1158,6 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
 	list_add_tail(&pchild->child_list, &prange->child_list);
 }
 
-/**
- * svm_range_split_by_granularity - collect ranges within granularity boundary
- *
- * @p: the process with svms list
- * @mm: mm structure
- * @addr: the vm fault address in pages, to split the prange
- * @parent: parent range if prange is from child list
- * @prange: prange to split
- *
- * Trims @prange to be a single aligned block of prange->granularity if
- * possible. The head and tail are added to the child_list in @parent.
- *
- * Context: caller must hold mmap_read_lock and prange->lock
- *
- * Return:
- * 0 - OK, otherwise error code
- */
-int
-svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
-			       unsigned long addr, struct svm_range *parent,
-			       struct svm_range *prange)
-{
-	struct svm_range *head, *tail;
-	unsigned long start, last, size;
-	int r;
-
-	/* Align splited range start and size to granularity size, then a single
-	 * PTE will be used for whole range, this reduces the number of PTE
-	 * updated and the L1 TLB space used for translation.
-	 */
-	size = 1UL << prange->granularity;
-	start = ALIGN_DOWN(addr, size);
-	last = ALIGN(addr + 1, size) - 1;
-
-	pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n",
-		 prange->svms, prange->start, prange->last, start, last, size);
-
-	if (start > prange->start) {
-		r = svm_range_split(prange, start, prange->last, &head);
-		if (r)
-			return r;
-		svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE);
-	}
-
-	if (last < prange->last) {
-		r = svm_range_split(prange, prange->start, last, &tail);
-		if (r)
-			return r;
-		svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
-	}
-
-	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
-	if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
-		prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
-		pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
-			 prange, prange->start, prange->last,
-			 SVM_OP_ADD_RANGE_AND_MAP);
-	}
-	return 0;
-}
 static bool
 svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
 {
@@ -1529,7 +1492,7 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr)
 	uint32_t gpuidx;
 	int r;
 
-	drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0);
+	drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0, 0);
 	drm_exec_until_all_locked(&ctx->exec) {
 		for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
 			pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
@@ -1614,6 +1577,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
  * 5. Release page table (and SVM BO) reservation
  */
 static int svm_range_validate_and_map(struct mm_struct *mm,
+				      unsigned long map_start, unsigned long map_last,
 				      struct svm_range *prange, int32_t gpuidx,
 				      bool intr, bool wait, bool flush_tlb)
 {
@@ -1653,18 +1617,24 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 			if (test_bit(gpuidx, prange->bitmap_access))
 				bitmap_set(ctx->bitmap, gpuidx, 1);
 		}
+
+		/*
+		 * If prange is already mapped or with always mapped flag,
+		 * update mapping on GPUs with ACCESS attribute
+		 */
+		if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
+			if (prange->mapped_to_gpu ||
+			    prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)
+				bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+		}
 	} else {
 		bitmap_or(ctx->bitmap, prange->bitmap_access,
 			  prange->bitmap_aip, MAX_GPU_INSTANCE);
 	}
 
 	if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
-		bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
-		if (!prange->mapped_to_gpu ||
-		    bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
-			r = 0;
-			goto free_ctx;
-		}
+		r = 0;
+		goto free_ctx;
 	}
 
 	if (prange->actual_loc && !prange->ttm_res) {
@@ -1688,10 +1658,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		}
 	}
 
-	start = prange->start << PAGE_SHIFT;
-	end = (prange->last + 1) << PAGE_SHIFT;
+	start = map_start << PAGE_SHIFT;
+	end = (map_last + 1) << PAGE_SHIFT;
 	for (addr = start; !r && addr < end; ) {
 		struct hmm_range *hmm_range;
+		unsigned long map_start_vma;
+		unsigned long map_last_vma;
 		struct vm_area_struct *vma;
 		unsigned long next = 0;
 		unsigned long offset;
@@ -1719,7 +1691,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		}
 
 		if (!r) {
-			offset = (addr - start) >> PAGE_SHIFT;
+			offset = (addr >> PAGE_SHIFT) - prange->start;
 			r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
 					      hmm_range->hmm_pfns);
 			if (r)
@@ -1737,9 +1709,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 			r = -EAGAIN;
 		}
 
-		if (!r)
-			r = svm_range_map_to_gpus(prange, offset, npages, readonly,
-						  ctx->bitmap, wait, flush_tlb);
+		if (!r) {
+			map_start_vma = max(map_start, prange->start + offset);
+			map_last_vma = min(map_last, prange->start + offset + npages - 1);
+			if (map_start_vma <= map_last_vma) {
+				offset = map_start_vma - prange->start;
+				npages = map_last_vma - map_start_vma + 1;
+				r = svm_range_map_to_gpus(prange, offset, npages, readonly,
+							  ctx->bitmap, wait, flush_tlb);
+			}
+		}
 
 		if (!r && next == end)
 			prange->mapped_to_gpu = true;
@@ -1832,8 +1811,8 @@ static void svm_range_restore_work(struct work_struct *work)
 		 */
 		mutex_lock(&prange->migrate_mutex);
 
-		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-					       false, true, false);
+		r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
+					       MAX_GPU_INSTANCE, false, true, false);
 		if (r)
 			pr_debug("failed %d to map 0x%lx to gpus\n", r,
 				 prange->start);
@@ -1870,7 +1849,7 @@ out_reschedule:
 	/* If validation failed, reschedule another attempt */
 	if (evicted_ranges) {
 		pr_debug("reschedule to restore svm range\n");
-		schedule_delayed_work(&svms->restore_work,
+		queue_delayed_work(system_freezable_wq, &svms->restore_work,
 			msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
 
 		kfd_smi_event_queue_restore_rescheduled(mm);
@@ -1946,7 +1925,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
 			pr_debug("failed to quiesce KFD\n");
 
 		pr_debug("schedule to restore svm %p ranges\n", svms);
-		schedule_delayed_work(&svms->restore_work,
+		queue_delayed_work(system_freezable_wq, &svms->restore_work,
 			msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
 	} else {
 		unsigned long s, l;
@@ -2001,6 +1980,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
 	new->actual_loc = old->actual_loc;
 	new->granularity = old->granularity;
 	new->mapped_to_gpu = old->mapped_to_gpu;
+	new->vram_pages = old->vram_pages;
 	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
 	bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
 
@@ -2908,6 +2888,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 			uint32_t vmid, uint32_t node_id,
 			uint64_t addr, bool write_fault)
 {
+	unsigned long start, last, size;
 	struct mm_struct *mm = NULL;
 	struct svm_range_list *svms;
 	struct svm_range *prange;
@@ -3043,40 +3024,44 @@ retry_write_locked:
 	kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
 				       write_fault, timestamp);
 
-	if (prange->actual_loc != best_loc) {
+	/* Align migration range start and size to granularity size */
+	size = 1UL << prange->granularity;
+	start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start);
+	last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last);
+	if (prange->actual_loc != 0 || best_loc != 0) {
 		migration = true;
+
 		if (best_loc) {
-			r = svm_migrate_to_vram(prange, best_loc, mm,
-					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
+			r = svm_migrate_to_vram(prange, best_loc, start, last,
+					mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
 			if (r) {
 				pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
 					 r, addr);
 				/* Fallback to system memory if migration to
 				 * VRAM failed
 				 */
-				if (prange->actual_loc)
-					r = svm_migrate_vram_to_ram(prange, mm,
-					   KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
-					   NULL);
+				if (prange->actual_loc && prange->actual_loc != best_loc)
+					r = svm_migrate_vram_to_ram(prange, mm, start, last,
+						KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
 				else
 					r = 0;
 			}
 		} else {
-			r = svm_migrate_vram_to_ram(prange, mm,
-					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
-					NULL);
+			r = svm_migrate_vram_to_ram(prange, mm, start, last,
+					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
 		}
 		if (r) {
 			pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
-				 r, svms, prange->start, prange->last);
+				 r, svms, start, last);
 			goto out_unlock_range;
 		}
 	}
 
-	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
+	r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
+				       false, false);
 	if (r)
 		pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
-			 r, svms, prange->start, prange->last);
+			 r, svms, start, last);
 
 	kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
 				     migration);
@@ -3422,18 +3407,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
 	*migrated = false;
 	best_loc = svm_range_best_prefetch_location(prange);
 
-	if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
-	    best_loc == prange->actual_loc)
+	/* when best_loc is a gpu node and same as prange->actual_loc
+	 * we still need do migration as prange->actual_loc !=0 does
+	 * not mean all pages in prange are vram. hmm migrate will pick
+	 * up right pages during migration.
+	 */
+	if ((best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) ||
+	    (best_loc == 0 && prange->actual_loc == 0))
 		return 0;
 
 	if (!best_loc) {
-		r = svm_migrate_vram_to_ram(prange, mm,
+		r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
 					KFD_MIGRATE_TRIGGER_PREFETCH, NULL);
 		*migrated = !r;
 		return r;
 	}
 
-	r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
+	r = svm_migrate_to_vram(prange, best_loc, prange->start, prange->last,
+				mm, KFD_MIGRATE_TRIGGER_PREFETCH);
 	*migrated = !r;
 
 	return r;
@@ -3488,7 +3479,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 
 		mutex_lock(&prange->migrate_mutex);
 		do {
+			/* migrate all vram pages in this prange to sys ram
+			 * after that prange->actual_loc should be zero
+			 */
 			r = svm_migrate_vram_to_ram(prange, mm,
+					prange->start, prange->last,
 					KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL);
 		} while (!r && prange->actual_loc && --retries);
 
@@ -3612,8 +3607,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
 
 		flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
 
-		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-					       true, true, flush_tlb);
+		r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
+					       MAX_GPU_INSTANCE, true, true, flush_tlb);
 		if (r)
 			pr_debug("failed %d to map svm range\n", r);
 
@@ -3627,8 +3622,8 @@ out_unlock_range:
 		pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n",
 			 prange, prange->start, prange->last);
 		mutex_lock(&prange->migrate_mutex);
-		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-					       true, true, prange->mapped_to_gpu);
+		r = svm_range_validate_and_map(mm,  prange->start, prange->last, prange,
+					       MAX_GPU_INSTANCE, true, true, prange->mapped_to_gpu);
 		if (r)
 			pr_debug("failed %d on remap svm range\n", r);
 		mutex_unlock(&prange->migrate_mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index c528df1d0ba2..026863a0abcd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -78,6 +78,7 @@ struct svm_work_list_item {
  * @update_list:link list node used to add to update_list
  * @mapping:    bo_va mapping structure to create and update GPU page table
  * @npages:     number of pages
+ * @vram_pages: vram pages number in this svm_range
  * @dma_addr:   dma mapping address on each GPU for system memory physical page
  * @ttm_res:    vram ttm resource map
  * @offset:     range start offset within mm_nodes
@@ -88,7 +89,9 @@ struct svm_work_list_item {
  * @flags:      flags defined as KFD_IOCTL_SVM_FLAG_*
  * @perferred_loc: perferred location, 0 for CPU, or GPU id
  * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
- * @actual_loc: the actual location, 0 for CPU, or GPU id
+ * @actual_loc: this svm_range location. 0: all pages are from sys ram;
+ *              GPU id: this svm_range may include vram pages from GPU with
+ *              id actual_loc.
  * @granularity:migration granularity, log2 num pages
  * @invalid:    not 0 means cpu page table is invalidated
  * @validate_timestamp: system timestamp when range is validated
@@ -112,6 +115,7 @@ struct svm_range {
 	struct list_head		list;
 	struct list_head		update_list;
 	uint64_t			npages;
+	uint64_t			vram_pages;
 	dma_addr_t			*dma_addr[MAX_GPU_INSTANCE];
 	struct ttm_resource		*ttm_res;
 	uint64_t			offset;
@@ -168,9 +172,6 @@ struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange,
 int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
 			    bool clear);
 void svm_range_vram_node_free(struct svm_range *prange);
-int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
-			       unsigned long addr, struct svm_range *parent,
-			       struct svm_range *prange);
 int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 			    uint32_t vmid, uint32_t node_id, uint64_t addr,
 			    bool write_fault);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 057284bf50bb..e5f7c92eebcb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1342,10 +1342,11 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g
 		num_cpu++;
 	}
 
+	if (list_empty(&kdev->io_link_props))
+		return -ENODATA;
+
 	gpu_link = list_first_entry(&kdev->io_link_props,
-					struct kfd_iolink_properties, list);
-	if (!gpu_link)
-		return -ENOMEM;
+				    struct kfd_iolink_properties, list);
 
 	for (i = 0; i < num_cpu; i++) {
 		/* CPU <--> GPU */
@@ -1423,15 +1424,17 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
 				peer->gpu->adev))
 		return ret;
 
+	if (list_empty(&kdev->io_link_props))
+		return -ENODATA;
+
 	iolink1 = list_first_entry(&kdev->io_link_props,
-							struct kfd_iolink_properties, list);
-	if (!iolink1)
-		return -ENOMEM;
+				   struct kfd_iolink_properties, list);
+
+	if (list_empty(&peer->io_link_props))
+		return -ENODATA;
 
 	iolink2 = list_first_entry(&peer->io_link_props,
-							struct kfd_iolink_properties, list);
-	if (!iolink2)
-		return -ENOMEM;
+				   struct kfd_iolink_properties, list);
 
 	props = kfd_alloc_struct(props);
 	if (!props)
@@ -1449,17 +1452,19 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
 		/* CPU->CPU  link*/
 		cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to);
 		if (cpu_dev) {
-			list_for_each_entry(iolink3, &cpu_dev->io_link_props, list)
-				if (iolink3->node_to == iolink2->node_to)
-					break;
-
-			props->weight += iolink3->weight;
-			props->min_latency += iolink3->min_latency;
-			props->max_latency += iolink3->max_latency;
-			props->min_bandwidth = min(props->min_bandwidth,
-							iolink3->min_bandwidth);
-			props->max_bandwidth = min(props->max_bandwidth,
-							iolink3->max_bandwidth);
+			list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) {
+				if (iolink3->node_to != iolink2->node_to)
+					continue;
+
+				props->weight += iolink3->weight;
+				props->min_latency += iolink3->min_latency;
+				props->max_latency += iolink3->max_latency;
+				props->min_bandwidth = min(props->min_bandwidth,
+							   iolink3->min_bandwidth);
+				props->max_bandwidth = min(props->max_bandwidth,
+							   iolink3->max_bandwidth);
+				break;
+			}
 		} else {
 			WARN(1, "CPU node not found");
 		}
diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile
index af17ab8027df..92a5c5efcf92 100644
--- a/drivers/gpu/drm/amd/display/Makefile
+++ b/drivers/gpu/drm/amd/display/Makefile
@@ -30,6 +30,9 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/hw
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/clk_mgr
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hwss
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/resource
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dsc
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/optc
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
index 8bf94920d23e..ab2a97e354da 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
@@ -25,22 +25,25 @@
 
 
 
+ifneq ($(CONFIG_DRM_AMD_DC),)
 AMDGPUDM = \
 	amdgpu_dm.o \
 	amdgpu_dm_plane.o \
 	amdgpu_dm_crtc.o \
 	amdgpu_dm_irq.o \
 	amdgpu_dm_mst_types.o \
-	amdgpu_dm_color.o
+	amdgpu_dm_color.o \
+	amdgpu_dm_services.o \
+	amdgpu_dm_helpers.o \
+	amdgpu_dm_pp_smu.o \
+	amdgpu_dm_psr.o \
+	amdgpu_dm_replay.o \
+	amdgpu_dm_wb.o
 
 ifdef CONFIG_DRM_AMD_DC_FP
 AMDGPUDM += dc_fpu.o
 endif
 
-ifneq ($(CONFIG_DRM_AMD_DC),)
-AMDGPUDM += amdgpu_dm_services.o amdgpu_dm_helpers.o amdgpu_dm_pp_smu.o amdgpu_dm_psr.o amdgpu_dm_replay.o
-endif
-
 AMDGPUDM += amdgpu_dm_hdcp.o
 
 ifneq ($(CONFIG_DEBUG_FS),)
@@ -52,3 +55,4 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc
 AMDGPU_DM = $(addprefix $(AMDDALPATH)/amdgpu_dm/,$(AMDGPUDM))
 
 AMD_DISPLAY_FILES += $(AMDGPU_DM)
+endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b8c3a9b104a4..f6575d7dee97 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -37,6 +37,7 @@
 #include "dc/dc_dmub_srv.h"
 #include "dc/dc_edid_parser.h"
 #include "dc/dc_stat.h"
+#include "dc/dc_state.h"
 #include "amdgpu_dm_trace.h"
 #include "dpcd_defs.h"
 #include "link/protocols/link_dpcd.h"
@@ -54,6 +55,7 @@
 #include "amdgpu_dm_crtc.h"
 #include "amdgpu_dm_hdcp.h"
 #include <drm/display/drm_hdcp_helper.h>
+#include "amdgpu_dm_wb.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_atombios.h"
 
@@ -65,7 +67,6 @@
 #include "amdgpu_dm_debugfs.h"
 #endif
 #include "amdgpu_dm_psr.h"
-#include "amdgpu_dm_replay.h"
 
 #include "ivsrcid/ivsrcid_vislands30.h"
 
@@ -576,6 +577,7 @@ static void dm_crtc_high_irq(void *interrupt_params)
 {
 	struct common_irq_params *irq_params = interrupt_params;
 	struct amdgpu_device *adev = irq_params->adev;
+	struct drm_writeback_job *job;
 	struct amdgpu_crtc *acrtc;
 	unsigned long flags;
 	int vrr_active;
@@ -584,6 +586,33 @@ static void dm_crtc_high_irq(void *interrupt_params)
 	if (!acrtc)
 		return;
 
+	if (acrtc->wb_pending) {
+		if (acrtc->wb_conn) {
+			spin_lock_irqsave(&acrtc->wb_conn->job_lock, flags);
+			job = list_first_entry_or_null(&acrtc->wb_conn->job_queue,
+						       struct drm_writeback_job,
+						       list_entry);
+			spin_unlock_irqrestore(&acrtc->wb_conn->job_lock, flags);
+
+			if (job) {
+				unsigned int v_total, refresh_hz;
+				struct dc_stream_state *stream = acrtc->dm_irq_params.stream;
+
+				v_total = stream->adjust.v_total_max ?
+					  stream->adjust.v_total_max : stream->timing.v_total;
+				refresh_hz = div_u64((uint64_t) stream->timing.pix_clk_100hz *
+					     100LL, (v_total * stream->timing.h_total));
+				mdelay(1000 / refresh_hz);
+
+				drm_writeback_signal_completion(acrtc->wb_conn, 0);
+				dc_stream_fc_disable_writeback(adev->dm.dc,
+							       acrtc->dm_irq_params.stream, 0);
+			}
+		} else
+			DRM_ERROR("%s: no amdgpu_crtc wb_conn\n", __func__);
+		acrtc->wb_pending = false;
+	}
+
 	vrr_active = amdgpu_dm_crtc_vrr_active_irq(acrtc);
 
 	drm_dbg_vbl(adev_to_drm(adev),
@@ -726,6 +755,10 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (link && aconnector->dc_link == link) {
 			if (notify->type == DMUB_NOTIFICATION_HPD)
@@ -895,8 +928,7 @@ static int dm_early_init(void *handle);
 /* Allocate memory for FBC compressed data  */
 static void amdgpu_dm_fbc_init(struct drm_connector *connector)
 {
-	struct drm_device *dev = connector->dev;
-	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_device *adev = drm_to_adev(connector->dev);
 	struct dm_compressor_info *compressor = &adev->dm.compressor;
 	struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(connector);
 	struct drm_display_mode *mode;
@@ -950,6 +982,10 @@ static int amdgpu_dm_audio_component_get_eld(struct device *kdev, int port,
 
 	drm_connector_list_iter_begin(dev, &conn_iter);
 	drm_for_each_connector_iter(connector, &conn_iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (aconnector->audio_inst != port)
 			continue;
@@ -990,8 +1026,7 @@ static int amdgpu_dm_audio_component_bind(struct device *kdev,
 static void amdgpu_dm_audio_component_unbind(struct device *kdev,
 					  struct device *hda_kdev, void *data)
 {
-	struct drm_device *dev = dev_get_drvdata(kdev);
-	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_device *adev = drm_to_adev(dev_get_drvdata(kdev));
 	struct drm_audio_component *acomp = data;
 
 	acomp->ops = NULL;
@@ -1259,7 +1294,9 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
 	/* AGP aperture is disabled */
 	if (agp_bot > agp_top) {
 		logical_addr_low = adev->gmc.fb_start >> 18;
-		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+		if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+				       AMD_APU_IS_RENOIR |
+				       AMD_APU_IS_GREEN_SARDINE))
 			/*
 			 * Raven2 has a HW issue that it is unable to use the vram which
 			 * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
@@ -1271,7 +1308,9 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
 			logical_addr_high = adev->gmc.fb_end >> 18;
 	} else {
 		logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18;
-		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+		if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+				       AMD_APU_IS_RENOIR |
+				       AMD_APU_IS_GREEN_SARDINE))
 			/*
 			 * Raven2 has a HW issue that it is unable to use the vram which
 			 * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
@@ -1676,6 +1715,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	init_data.nbio_reg_offsets = adev->reg_offset[NBIO_HWIP][0];
 	init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0];
 
+	init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL;
+
+	/* Enable DWB for tested platforms only */
+	if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0))
+		init_data.num_virtual_links = 1;
+
 	INIT_LIST_HEAD(&adev->dm.da_list);
 
 	retrieve_dmi_info(&adev->dm);
@@ -1718,23 +1763,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	/* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */
 	adev->dm.dc->debug.ignore_cable_id = true;
 
-	/* TODO: There is a new drm mst change where the freedom of
-	 * vc_next_start_slot update is revoked/moved into drm, instead of in
-	 * driver. This forces us to make sure to get vc_next_start_slot updated
-	 * in drm function each time without considering if mst_state is active
-	 * or not. Otherwise, next time hotplug will give wrong start_slot
-	 * number. We are implementing a temporary solution to even notify drm
-	 * mst deallocation when link is no longer of MST type when uncommitting
-	 * the stream so we will have more time to work on a proper solution.
-	 * Ideally when dm_helpers_dp_mst_stop_top_mgr message is triggered, we
-	 * should notify drm to do a complete "reset" of its states and stop
-	 * calling further drm mst functions when link is no longer of an MST
-	 * type. This could happen when we unplug an MST hubs/displays. When
-	 * uncommit stream comes later after unplug, we should just reset
-	 * hardware states only.
-	 */
-	adev->dm.dc->debug.temp_mst_deallocation_sequence = true;
-
 	if (adev->dm.dc->caps.dp_hdmi21_pcon_support)
 		DRM_INFO("DP-HDMI FRL PCON supported\n");
 
@@ -2270,6 +2298,10 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev)
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (aconnector->dc_link->type == dc_connection_mst_branch &&
 		    aconnector->mst_mgr.aux) {
@@ -2398,6 +2430,10 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (aconnector->dc_link->type != dc_connection_mst_branch ||
 		    aconnector->mst_root)
@@ -2577,12 +2613,10 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
 
 	memset(del_streams, 0, sizeof(del_streams));
 
-	context = dc_create_state(dc);
+	context = dc_state_create_current_copy(dc);
 	if (context == NULL)
 		goto context_alloc_fail;
 
-	dc_resource_state_copy_construct_current(dc, context);
-
 	/* First remove from context all streams */
 	for (i = 0; i < context->stream_count; i++) {
 		struct dc_stream_state *stream = context->streams[i];
@@ -2592,12 +2626,12 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
 
 	/* Remove all planes for removed streams and then remove the streams */
 	for (i = 0; i < del_streams_count; i++) {
-		if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) {
+		if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) {
 			res = DC_FAIL_DETACH_SURFACES;
 			goto fail;
 		}
 
-		res = dc_remove_stream_from_ctx(dc, context, del_streams[i]);
+		res = dc_state_remove_stream(dc, context, del_streams[i]);
 		if (res != DC_OK)
 			goto fail;
 	}
@@ -2605,7 +2639,7 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
 	res = dc_commit_streams(dc, context->streams, context->stream_count);
 
 fail:
-	dc_release_state(context);
+	dc_state_release(context);
 
 context_alloc_fail:
 	return res;
@@ -2632,7 +2666,7 @@ static int dm_suspend(void *handle)
 
 		dc_allow_idle_optimizations(adev->dm.dc, false);
 
-		dm->cached_dc_state = dc_copy_state(dm->dc->current_state);
+		dm->cached_dc_state = dc_state_create_copy(dm->dc->current_state);
 
 		dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, false);
 
@@ -2657,11 +2691,12 @@ static int dm_suspend(void *handle)
 	hpd_rx_irq_work_suspend(dm);
 
 	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
+	dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3);
 
 	return 0;
 }
 
-struct amdgpu_dm_connector *
+struct drm_connector *
 amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
 					     struct drm_crtc *crtc)
 {
@@ -2674,7 +2709,7 @@ amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
 		crtc_from_state = new_con_state->crtc;
 
 		if (crtc_from_state == crtc)
-			return to_amdgpu_dm_connector(connector);
+			return connector;
 	}
 
 	return NULL;
@@ -2825,7 +2860,7 @@ static int dm_resume(void *handle)
 	bool need_hotplug = false;
 
 	if (dm->dc->caps.ips_support) {
-		dc_dmub_srv_exit_low_power_state(dm->dc);
+		dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false);
 	}
 
 	if (amdgpu_in_reset(adev)) {
@@ -2852,6 +2887,7 @@ static int dm_resume(void *handle)
 		if (r)
 			DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
 
+		dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0);
 		dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
 
 		dc_resume(dm->dc);
@@ -2877,7 +2913,7 @@ static int dm_resume(void *handle)
 
 		dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, true);
 
-		dc_release_state(dm->cached_dc_state);
+		dc_state_release(dm->cached_dc_state);
 		dm->cached_dc_state = NULL;
 
 		amdgpu_dm_irq_resume_late(adev);
@@ -2887,10 +2923,9 @@ static int dm_resume(void *handle)
 		return 0;
 	}
 	/* Recreate dc_state - DC invalidates it when setting power state to S3. */
-	dc_release_state(dm_state->context);
-	dm_state->context = dc_create_state(dm->dc);
+	dc_state_release(dm_state->context);
+	dm_state->context = dc_state_create(dm->dc);
 	/* TODO: Remove dc_state->dccg, use dc->dccg directly. */
-	dc_resource_state_construct(dm->dc, dm_state->context);
 
 	/* Before powering on DC we need to re-initialize DMUB. */
 	dm_dmub_hw_resume(adev);
@@ -2902,6 +2937,7 @@ static int dm_resume(void *handle)
 	}
 
 	/* power on hardware */
+	dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0);
 	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
 
 	/* program HPD filter */
@@ -2919,6 +2955,10 @@ static int dm_resume(void *handle)
 	/* Do detection*/
 	drm_connector_list_iter_begin(ddev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 
 		if (!aconnector->dc_link)
@@ -3492,6 +3532,9 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
 	list_for_each_entry(connector,
 			&dev->mode_config.connector_list, head)	{
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		dc_link = aconnector->dc_link;
 
@@ -3958,7 +4001,7 @@ dm_atomic_duplicate_state(struct drm_private_obj *obj)
 	old_state = to_dm_atomic_state(obj->state);
 
 	if (old_state && old_state->context)
-		new_state->context = dc_copy_state(old_state->context);
+		new_state->context = dc_state_create_copy(old_state->context);
 
 	if (!new_state->context) {
 		kfree(new_state);
@@ -3974,7 +4017,7 @@ static void dm_atomic_destroy_state(struct drm_private_obj *obj,
 	struct dm_atomic_state *dm_state = to_dm_atomic_state(state);
 
 	if (dm_state && dm_state->context)
-		dc_release_state(dm_state->context);
+		dc_state_release(dm_state->context);
 
 	kfree(dm_state);
 }
@@ -4010,14 +4053,12 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
 	if (!state)
 		return -ENOMEM;
 
-	state->context = dc_create_state(adev->dm.dc);
+	state->context = dc_state_create_current_copy(adev->dm.dc);
 	if (!state->context) {
 		kfree(state);
 		return -ENOMEM;
 	}
 
-	dc_resource_state_copy_construct_current(adev->dm.dc, state->context);
-
 	drm_atomic_private_obj_init(adev_to_drm(adev),
 				    &adev->dm.atomic_obj,
 				    &state->base,
@@ -4025,14 +4066,19 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
 
 	r = amdgpu_display_modeset_create_props(adev);
 	if (r) {
-		dc_release_state(state->context);
+		dc_state_release(state->context);
 		kfree(state);
 		return r;
 	}
 
+#ifdef AMD_PRIVATE_COLOR
+	if (amdgpu_dm_create_color_properties(adev))
+		return -ENOMEM;
+#endif
+
 	r = amdgpu_dm_audio_init(adev);
 	if (r) {
-		dc_release_state(state->context);
+		dc_state_release(state->context);
 		kfree(state);
 		return r;
 	}
@@ -4346,7 +4392,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 	enum dc_connection_type new_connection_type = dc_connection_none;
 	const struct dc_plane_cap *plane;
 	bool psr_feature_enabled = false;
-	bool replay_feature_enabled = false;
 	int max_overlay = dm->dc->caps.max_slave_planes;
 
 	dm->display_indexes_num = dm->dc->caps.max_streams;
@@ -4458,20 +4503,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 		}
 	}
 
-	if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) {
-		switch (adev->ip_versions[DCE_HWIP][0]) {
-		case IP_VERSION(3, 1, 4):
-		case IP_VERSION(3, 1, 5):
-		case IP_VERSION(3, 1, 6):
-		case IP_VERSION(3, 2, 0):
-		case IP_VERSION(3, 2, 1):
-			replay_feature_enabled = true;
-			break;
-		default:
-			replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK;
-			break;
-		}
-	}
 	/* loops over all connectors on the board */
 	for (i = 0; i < link_cnt; i++) {
 		struct dc_link *link = NULL;
@@ -4483,6 +4514,28 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 			continue;
 		}
 
+		link = dc_get_link_at_index(dm->dc, i);
+
+		if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) {
+			struct amdgpu_dm_wb_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL);
+
+			if (!wbcon) {
+				DRM_ERROR("KMS: Failed to allocate writeback connector\n");
+				continue;
+			}
+
+			if (amdgpu_dm_wb_connector_init(dm, wbcon, i)) {
+				DRM_ERROR("KMS: Failed to initialize writeback connector\n");
+				kfree(wbcon);
+				continue;
+			}
+
+			link->psr_settings.psr_feature_enabled = false;
+			link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
+
+			continue;
+		}
+
 		aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL);
 		if (!aconnector)
 			goto fail;
@@ -4501,8 +4554,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 			goto fail;
 		}
 
-		link = dc_get_link_at_index(dm->dc, i);
-
 		if (!dc_link_detect_connection_type(link, &new_connection_type))
 			DRM_ERROR("KMS: Failed to detect connector\n");
 
@@ -4520,12 +4571,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 				amdgpu_dm_update_connector_after_detect(aconnector);
 				setup_backlight_device(dm, aconnector);
 
-				/*
-				 * Disable psr if replay can be enabled
-				 */
-				if (replay_feature_enabled && amdgpu_dm_setup_replay(link, aconnector))
-					psr_feature_enabled = false;
-
 				if (psr_feature_enabled)
 					amdgpu_dm_set_psr_caps(link);
 
@@ -5107,7 +5152,9 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
 	 * Always set input transfer function, since plane state is refreshed
 	 * every time.
 	 */
-	ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state);
+	ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state,
+						plane_state,
+						dc_plane_state);
 	if (ret)
 		return ret;
 
@@ -5183,6 +5230,9 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
 	if (plane->type == DRM_PLANE_TYPE_CURSOR)
 		return;
 
+	if (new_plane_state->rotation != DRM_MODE_ROTATE_0)
+		goto ffu;
+
 	num_clips = drm_plane_get_damage_clips_count(new_plane_state);
 	clips = drm_plane_get_damage_clips(new_plane_state);
 
@@ -5509,10 +5559,13 @@ static void fill_stream_properties_from_drm_display_mode(
 {
 	struct dc_crtc_timing *timing_out = &stream->timing;
 	const struct drm_display_info *info = &connector->display_info;
-	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+	struct amdgpu_dm_connector *aconnector = NULL;
 	struct hdmi_vendor_infoframe hv_frame;
 	struct hdmi_avi_infoframe avi_frame;
 
+	if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+		aconnector = to_amdgpu_dm_connector(connector);
+
 	memset(&hv_frame, 0, sizeof(hv_frame));
 	memset(&avi_frame, 0, sizeof(avi_frame));
 
@@ -5525,6 +5578,7 @@ static void fill_stream_properties_from_drm_display_mode(
 			&& stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
 		timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
 	else if (drm_mode_is_420_also(info, mode_in)
+			&& aconnector
 			&& aconnector->force_yuv420_output)
 		timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
 	else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR444)
@@ -5560,7 +5614,7 @@ static void fill_stream_properties_from_drm_display_mode(
 		timing_out->hdmi_vic = hv_frame.vic;
 	}
 
-	if (is_freesync_video_mode(mode_in, aconnector)) {
+	if (aconnector && is_freesync_video_mode(mode_in, aconnector)) {
 		timing_out->h_addressable = mode_in->hdisplay;
 		timing_out->h_total = mode_in->htotal;
 		timing_out->h_sync_width = mode_in->hsync_end - mode_in->hsync_start;
@@ -5681,13 +5735,13 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode,
 }
 
 static struct dc_sink *
-create_fake_sink(struct amdgpu_dm_connector *aconnector)
+create_fake_sink(struct dc_link *link)
 {
 	struct dc_sink_init_data sink_init_data = { 0 };
 	struct dc_sink *sink = NULL;
 
-	sink_init_data.link = aconnector->dc_link;
-	sink_init_data.sink_signal = aconnector->dc_link->connector_signal;
+	sink_init_data.link = link;
+	sink_init_data.sink_signal = link->connector_signal;
 
 	sink = dc_sink_create(&sink_init_data);
 	if (!sink) {
@@ -6037,14 +6091,14 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector,
 }
 
 static struct dc_stream_state *
-create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+create_stream_for_sink(struct drm_connector *connector,
 		       const struct drm_display_mode *drm_mode,
 		       const struct dm_connector_state *dm_state,
 		       const struct dc_stream_state *old_stream,
 		       int requested_bpc)
 {
+	struct amdgpu_dm_connector *aconnector = NULL;
 	struct drm_display_mode *preferred_mode = NULL;
-	struct drm_connector *drm_connector;
 	const struct drm_connector_state *con_state = &dm_state->base;
 	struct dc_stream_state *stream = NULL;
 	struct drm_display_mode mode;
@@ -6058,22 +6112,35 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN;
 	struct dsc_dec_dpcd_caps dsc_caps;
 
+	struct dc_link *link = NULL;
 	struct dc_sink *sink = NULL;
 
 	drm_mode_init(&mode, drm_mode);
 	memset(&saved_mode, 0, sizeof(saved_mode));
 
-	if (aconnector == NULL) {
-		DRM_ERROR("aconnector is NULL!\n");
+	if (connector == NULL) {
+		DRM_ERROR("connector is NULL!\n");
 		return stream;
 	}
 
-	drm_connector = &aconnector->base;
+	if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) {
+		aconnector = NULL;
+		aconnector = to_amdgpu_dm_connector(connector);
+		link = aconnector->dc_link;
+	} else {
+		struct drm_writeback_connector *wbcon = NULL;
+		struct amdgpu_dm_wb_connector *dm_wbcon = NULL;
+
+		wbcon = drm_connector_to_writeback(connector);
+		dm_wbcon = to_amdgpu_dm_wb_connector(wbcon);
+		link = dm_wbcon->link;
+	}
 
-	if (!aconnector->dc_sink) {
-		sink = create_fake_sink(aconnector);
+	if (!aconnector || !aconnector->dc_sink) {
+		sink = create_fake_sink(link);
 		if (!sink)
 			return stream;
+
 	} else {
 		sink = aconnector->dc_sink;
 		dc_sink_retain(sink);
@@ -6086,12 +6153,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		goto finish;
 	}
 
+	/* We leave this NULL for writeback connectors */
 	stream->dm_stream_context = aconnector;
 
 	stream->timing.flags.LTE_340MCSC_SCRAMBLE =
-		drm_connector->display_info.hdmi.scdc.scrambling.low_rates;
+		connector->display_info.hdmi.scdc.scrambling.low_rates;
 
-	list_for_each_entry(preferred_mode, &aconnector->base.modes, head) {
+	list_for_each_entry(preferred_mode, &connector->modes, head) {
 		/* Search for preferred mode */
 		if (preferred_mode->type & DRM_MODE_TYPE_PREFERRED) {
 			native_mode_found = true;
@@ -6100,7 +6168,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	}
 	if (!native_mode_found)
 		preferred_mode = list_first_entry_or_null(
-				&aconnector->base.modes,
+				&connector->modes,
 				struct drm_display_mode,
 				head);
 
@@ -6114,7 +6182,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		 * and the modelist may not be filled in time.
 		 */
 		DRM_DEBUG_DRIVER("No preferred mode found\n");
-	} else {
+	} else if (aconnector) {
 		recalculate_timing = is_freesync_video_mode(&mode, aconnector);
 		if (recalculate_timing) {
 			freesync_mode = get_highest_refresh_rate_mode(aconnector, false);
@@ -6137,13 +6205,17 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	 */
 	if (!scale || mode_refresh != preferred_refresh)
 		fill_stream_properties_from_drm_display_mode(
-			stream, &mode, &aconnector->base, con_state, NULL,
+			stream, &mode, connector, con_state, NULL,
 			requested_bpc);
 	else
 		fill_stream_properties_from_drm_display_mode(
-			stream, &mode, &aconnector->base, con_state, old_stream,
+			stream, &mode, connector, con_state, old_stream,
 			requested_bpc);
 
+	/* The rest isn't needed for writeback connectors */
+	if (!aconnector)
+		goto finish;
+
 	if (aconnector->timing_changed) {
 		drm_dbg(aconnector->base.dev,
 			"overriding timing for automated test, bpc %d, changing to %d\n",
@@ -6161,15 +6233,16 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
 	fill_audio_info(
 		&stream->audio_info,
-		drm_connector,
+		connector,
 		sink);
 
 	update_stream_signal(stream, sink);
 
 	if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
 		mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket);
-
-	if (stream->link->psr_settings.psr_feature_enabled || stream->link->replay_settings.replay_feature_enabled) {
+	else if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+			 stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+			 stream->signal == SIGNAL_TYPE_EDP) {
 		//
 		// should decide stream support vsc sdp colorimetry capability
 		// before building vsc info packet
@@ -6185,8 +6258,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
 			tf = TRANSFER_FUNC_GAMMA_22;
 		mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
-		aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
 
+		if (stream->link->psr_settings.psr_feature_enabled)
+			aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
 	}
 finish:
 	dc_sink_release(sink);
@@ -6268,7 +6342,7 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector,
 		dm_new_state->underscan_enable = val;
 		ret = 0;
 	} else if (property == adev->mode_info.abm_level_property) {
-		dm_new_state->abm_level = val;
+		dm_new_state->abm_level = val ?: ABM_LEVEL_IMMEDIATE_DISABLE;
 		ret = 0;
 	}
 
@@ -6313,7 +6387,8 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector,
 		*val = dm_state->underscan_enable;
 		ret = 0;
 	} else if (property == adev->mode_info.abm_level_property) {
-		*val = dm_state->abm_level;
+		*val = (dm_state->abm_level != ABM_LEVEL_IMMEDIATE_DISABLE) ?
+			dm_state->abm_level : 0;
 		ret = 0;
 	}
 
@@ -6386,7 +6461,8 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector)
 		state->pbn = 0;
 
 		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
-			state->abm_level = amdgpu_dm_abm_level;
+			state->abm_level = amdgpu_dm_abm_level ?:
+				ABM_LEVEL_IMMEDIATE_DISABLE;
 
 		__drm_atomic_helper_connector_reset(connector, &state->base);
 	}
@@ -6564,7 +6640,7 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc,
 	if (!dc_plane_state)
 		goto cleanup;
 
-	dc_state = dc_create_state(dc);
+	dc_state = dc_state_create(dc);
 	if (!dc_state)
 		goto cleanup;
 
@@ -6591,9 +6667,9 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc,
 		dc_result = dc_validate_plane(dc, dc_plane_state);
 
 	if (dc_result == DC_OK)
-		dc_result = dc_add_stream_to_ctx(dc, dc_state, stream);
+		dc_result = dc_state_add_stream(dc, dc_state, stream);
 
-	if (dc_result == DC_OK && !dc_add_plane_to_context(
+	if (dc_result == DC_OK && !dc_state_add_plane(
 						dc,
 						stream,
 						dc_plane_state,
@@ -6605,7 +6681,7 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc,
 
 cleanup:
 	if (dc_state)
-		dc_release_state(dc_state);
+		dc_state_release(dc_state);
 
 	if (dc_plane_state)
 		dc_plane_state_release(dc_plane_state);
@@ -6627,7 +6703,7 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	enum dc_status dc_result = DC_OK;
 
 	do {
-		stream = create_stream_for_sink(aconnector, drm_mode,
+		stream = create_stream_for_sink(connector, drm_mode,
 						dm_state, old_stream,
 						requested_bpc);
 		if (stream == NULL) {
@@ -6635,6 +6711,9 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 			break;
 		}
 
+		if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			return stream;
+
 		dc_result = dc_validate_stream(adev->dm.dc, stream);
 		if (dc_result == DC_OK && stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 			dc_result = dm_dp_mst_is_port_support_mode(aconnector, stream);
@@ -6910,8 +6989,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
 	if (IS_ERR(mst_state))
 		return PTR_ERR(mst_state);
 
-	if (!mst_state->pbn_div.full)
-		mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link));
+	mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link));
 
 	if (!state->duplicated) {
 		int max_bpc = conn_state->max_requested_bpc;
@@ -6955,6 +7033,9 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
 
 	for_each_new_connector_in_state(state, connector, new_con_state, i) {
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 
 		if (!aconnector->mst_output_port)
@@ -7560,6 +7641,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
 	struct dc_link *link = dc_get_link_at_index(dc, link_index);
 	struct amdgpu_i2c_adapter *i2c;
 
+	/* Not needed for writeback connector */
 	link->priv = aconnector;
 
 
@@ -8170,6 +8252,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 			bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction;
 			bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func;
 			bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix;
+			bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult;
+			bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func;
+			bundle->surface_updates[planes_count].lut3d_func = dc_plane->lut3d_func;
+			bundle->surface_updates[planes_count].blend_tf = dc_plane->blend_tf;
 		}
 
 		amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
@@ -8381,6 +8467,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 				&acrtc_state->stream->csc_color_matrix;
 			bundle->stream_update.out_transfer_func =
 				acrtc_state->stream->out_transfer_func;
+			bundle->stream_update.lut3d_func =
+				(struct dc_3dlut *) acrtc_state->stream->lut3d_func;
+			bundle->stream_update.func_shaper =
+				(struct dc_transfer_func *) acrtc_state->stream->func_shaper;
 		}
 
 		acrtc_state->stream->abm_level = acrtc_state->abm_level;
@@ -8514,6 +8604,9 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev,
 		if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
 			continue;
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 notify:
 		aconnector = to_amdgpu_dm_connector(connector);
 
@@ -8547,6 +8640,9 @@ notify:
 		if (!status)
 			continue;
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 
 		mutex_lock(&adev->dm.audio_lock);
@@ -8572,6 +8668,12 @@ static void amdgpu_dm_crtc_copy_transient_flags(struct drm_crtc_state *crtc_stat
 	stream_state->mode_changed = drm_atomic_crtc_needs_modeset(crtc_state);
 }
 
+static void dm_clear_writeback(struct amdgpu_display_manager *dm,
+			      struct dm_crtc_state *crtc_state)
+{
+	dc_stream_remove_writeback(dm->dc, crtc_state->stream, 0);
+}
+
 static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 					struct dc_state *dc_state)
 {
@@ -8581,9 +8683,38 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
 	struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
+	struct drm_connector_state *old_con_state;
+	struct drm_connector *connector;
 	bool mode_set_reset_required = false;
 	u32 i;
 
+	/* Disable writeback */
+	for_each_old_connector_in_state(state, connector, old_con_state, i) {
+		struct dm_connector_state *dm_old_con_state;
+		struct amdgpu_crtc *acrtc;
+
+		if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		old_crtc_state = NULL;
+
+		dm_old_con_state = to_dm_connector_state(old_con_state);
+		if (!dm_old_con_state->base.crtc)
+			continue;
+
+		acrtc = to_amdgpu_crtc(dm_old_con_state->base.crtc);
+		if (acrtc)
+			old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base);
+
+		if (!acrtc->wb_enabled)
+			continue;
+
+		dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
+
+		dm_clear_writeback(dm, dm_old_crtc_state);
+		acrtc->wb_enabled = false;
+	}
+
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
 				      new_crtc_state, i) {
 		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
@@ -8708,7 +8839,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 					dc_stream_get_status(dm_new_crtc_state->stream);
 
 			if (!status)
-				status = dc_stream_get_status_from_state(dc_state,
+				status = dc_state_get_stream_status(dc_state,
 									 dm_new_crtc_state->stream);
 			if (!status)
 				drm_err(dev,
@@ -8720,6 +8851,105 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 	}
 }
 
+static void dm_set_writeback(struct amdgpu_display_manager *dm,
+			      struct dm_crtc_state *crtc_state,
+			      struct drm_connector *connector,
+			      struct drm_connector_state *new_con_state)
+{
+	struct drm_writeback_connector *wb_conn = drm_connector_to_writeback(connector);
+	struct amdgpu_device *adev = dm->adev;
+	struct amdgpu_crtc *acrtc;
+	struct dc_writeback_info *wb_info;
+	struct pipe_ctx *pipe = NULL;
+	struct amdgpu_framebuffer *afb;
+	int i = 0;
+
+	wb_info = kzalloc(sizeof(*wb_info), GFP_KERNEL);
+	if (!wb_info) {
+		DRM_ERROR("Failed to allocate wb_info\n");
+		return;
+	}
+
+	acrtc = to_amdgpu_crtc(wb_conn->encoder.crtc);
+	if (!acrtc) {
+		DRM_ERROR("no amdgpu_crtc found\n");
+		kfree(wb_info);
+		return;
+	}
+
+	afb = to_amdgpu_framebuffer(new_con_state->writeback_job->fb);
+	if (!afb) {
+		DRM_ERROR("No amdgpu_framebuffer found\n");
+		kfree(wb_info);
+		return;
+	}
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		if (dm->dc->current_state->res_ctx.pipe_ctx[i].stream == crtc_state->stream) {
+			pipe = &dm->dc->current_state->res_ctx.pipe_ctx[i];
+			break;
+		}
+	}
+
+	/* fill in wb_info */
+	wb_info->wb_enabled = true;
+
+	wb_info->dwb_pipe_inst = 0;
+	wb_info->dwb_params.dwbscl_black_color = 0;
+	wb_info->dwb_params.hdr_mult = 0x1F000;
+	wb_info->dwb_params.csc_params.gamut_adjust_type = CM_GAMUT_ADJUST_TYPE_BYPASS;
+	wb_info->dwb_params.csc_params.gamut_coef_format = CM_GAMUT_REMAP_COEF_FORMAT_S2_13;
+	wb_info->dwb_params.output_depth = DWB_OUTPUT_PIXEL_DEPTH_10BPC;
+	wb_info->dwb_params.cnv_params.cnv_out_bpc = DWB_CNV_OUT_BPC_10BPC;
+
+	/* width & height from crtc */
+	wb_info->dwb_params.cnv_params.src_width = acrtc->base.mode.crtc_hdisplay;
+	wb_info->dwb_params.cnv_params.src_height = acrtc->base.mode.crtc_vdisplay;
+	wb_info->dwb_params.dest_width = acrtc->base.mode.crtc_hdisplay;
+	wb_info->dwb_params.dest_height = acrtc->base.mode.crtc_vdisplay;
+
+	wb_info->dwb_params.cnv_params.crop_en = false;
+	wb_info->dwb_params.stereo_params.stereo_enabled = false;
+
+	wb_info->dwb_params.cnv_params.out_max_pix_val = 0x3ff;	// 10 bits
+	wb_info->dwb_params.cnv_params.out_min_pix_val = 0;
+	wb_info->dwb_params.cnv_params.fc_out_format = DWB_OUT_FORMAT_32BPP_ARGB;
+	wb_info->dwb_params.cnv_params.out_denorm_mode = DWB_OUT_DENORM_BYPASS;
+
+	wb_info->dwb_params.out_format = dwb_scaler_mode_bypass444;
+
+	wb_info->dwb_params.capture_rate = dwb_capture_rate_0;
+
+	wb_info->dwb_params.scaler_taps.h_taps = 4;
+	wb_info->dwb_params.scaler_taps.v_taps = 4;
+	wb_info->dwb_params.scaler_taps.h_taps_c = 2;
+	wb_info->dwb_params.scaler_taps.v_taps_c = 2;
+	wb_info->dwb_params.subsample_position = DWB_INTERSTITIAL_SUBSAMPLING;
+
+	wb_info->mcif_buf_params.luma_pitch = afb->base.pitches[0];
+	wb_info->mcif_buf_params.chroma_pitch = afb->base.pitches[1];
+
+	for (i = 0; i < DWB_MCIF_BUF_COUNT; i++) {
+		wb_info->mcif_buf_params.luma_address[i] = afb->address;
+		wb_info->mcif_buf_params.chroma_address[i] = 0;
+	}
+
+	wb_info->mcif_buf_params.p_vmid = 1;
+	if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) {
+		wb_info->mcif_warmup_params.start_address.quad_part = afb->address;
+		wb_info->mcif_warmup_params.region_size =
+			wb_info->mcif_buf_params.luma_pitch * wb_info->dwb_params.dest_height;
+	}
+	wb_info->mcif_warmup_params.p_vmid = 1;
+	wb_info->writeback_source_plane = pipe->plane_state;
+
+	dc_stream_add_writeback(dm->dc, crtc_state->stream, wb_info);
+
+	acrtc->wb_pending = true;
+	acrtc->wb_conn = wb_conn;
+	drm_writeback_queue_job(wb_conn, new_con_state);
+}
+
 /**
  * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation.
  * @state: The atomic state to commit
@@ -8752,7 +8982,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 			if (new_con_state->crtc &&
 				new_con_state->crtc->state->active &&
 				drm_atomic_crtc_needs_modeset(new_con_state->crtc->state)) {
-				dc_dmub_srv_exit_low_power_state(dm->dc);
+				dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false);
 				break;
 			}
 		}
@@ -8770,7 +9000,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 	for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
 		struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
 		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
-		struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+		struct amdgpu_dm_connector *aconnector;
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		aconnector = to_amdgpu_dm_connector(connector);
 
 		if (!adev->dm.hdcp_workqueue)
 			continue;
@@ -9047,6 +9282,31 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 			amdgpu_dm_commit_planes(state, dev, dm, crtc, wait_for_vblank);
 	}
 
+	/* Enable writeback */
+	for_each_new_connector_in_state(state, connector, new_con_state, i) {
+		struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
+		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
+
+		if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		if (!new_con_state->writeback_job)
+			continue;
+
+		new_crtc_state = NULL;
+
+		if (acrtc)
+			new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
+
+		if (acrtc->wb_enabled)
+			continue;
+
+		dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+
+		dm_set_writeback(dm, dm_new_crtc_state, connector, new_con_state);
+		acrtc->wb_enabled = true;
+	}
+
 	/* Update audio instances for each connector. */
 	amdgpu_dm_commit_audio(dev, state);
 
@@ -9164,10 +9424,15 @@ out:
 void dm_restore_drm_connector_state(struct drm_device *dev,
 				    struct drm_connector *connector)
 {
-	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+	struct amdgpu_dm_connector *aconnector;
 	struct amdgpu_crtc *disconnected_acrtc;
 	struct dm_crtc_state *acrtc_state;
 
+	if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+		return;
+
+	aconnector = to_amdgpu_dm_connector(connector);
+
 	if (!aconnector->dc_sink || !connector->state || !connector->encoder)
 		return;
 
@@ -9244,12 +9509,16 @@ static void get_freesync_config_for_crtc(
 	struct dm_connector_state *new_con_state)
 {
 	struct mod_freesync_config config = {0};
-	struct amdgpu_dm_connector *aconnector =
-			to_amdgpu_dm_connector(new_con_state->base.connector);
+	struct amdgpu_dm_connector *aconnector;
 	struct drm_display_mode *mode = &new_crtc_state->base.mode;
 	int vrefresh = drm_mode_vrefresh(mode);
 	bool fs_vid_mode = false;
 
+	if (new_con_state->base.connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+		return;
+
+	aconnector = to_amdgpu_dm_connector(new_con_state->base.connector);
+
 	new_crtc_state->vrr_supported = new_con_state->freesync_capable &&
 					vrefresh >= aconnector->min_vfreq &&
 					vrefresh <= aconnector->max_vfreq;
@@ -9349,6 +9618,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 	 * update changed items
 	 */
 	struct amdgpu_crtc *acrtc = NULL;
+	struct drm_connector *connector = NULL;
 	struct amdgpu_dm_connector *aconnector = NULL;
 	struct drm_connector_state *drm_new_conn_state = NULL, *drm_old_conn_state = NULL;
 	struct dm_connector_state *dm_new_conn_state = NULL, *dm_old_conn_state = NULL;
@@ -9358,15 +9628,17 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 	dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
 	dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
 	acrtc = to_amdgpu_crtc(crtc);
-	aconnector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc);
+	connector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc);
+	if (connector)
+		aconnector = to_amdgpu_dm_connector(connector);
 
 	/* TODO This hack should go away */
-	if (aconnector && enable) {
+	if (connector && enable) {
 		/* Make sure fake sink is created in plug-in scenario */
 		drm_new_conn_state = drm_atomic_get_new_connector_state(state,
-							    &aconnector->base);
+									connector);
 		drm_old_conn_state = drm_atomic_get_old_connector_state(state,
-							    &aconnector->base);
+									connector);
 
 		if (IS_ERR(drm_new_conn_state)) {
 			ret = PTR_ERR_OR_ZERO(drm_new_conn_state);
@@ -9492,7 +9764,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 				crtc->base.id);
 
 		/* i.e. reset mode */
-		if (dc_remove_stream_from_ctx(
+		if (dc_state_remove_stream(
 				dm->dc,
 				dm_state->context,
 				dm_old_crtc_state->stream) != DC_OK) {
@@ -9513,7 +9785,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 		 * added MST connectors not found in existing crtc_state in the chained mode
 		 * TODO: need to dig out the root cause of that
 		 */
-		if (!aconnector)
+		if (!connector)
 			goto skip_modeset;
 
 		if (modereset_required(new_crtc_state))
@@ -9535,7 +9807,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 			DRM_DEBUG_ATOMIC("Enabling DRM crtc: %d\n",
 					 crtc->base.id);
 
-			if (dc_add_stream_to_ctx(
+			if (dc_state_add_stream(
 					dm->dc,
 					dm_state->context,
 					dm_new_crtc_state->stream) != DC_OK) {
@@ -9556,7 +9828,7 @@ skip_modeset:
 	 * We want to do dc stream updates that do not require a
 	 * full modeset below.
 	 */
-	if (!(enable && aconnector && new_crtc_state->active))
+	if (!(enable && connector && new_crtc_state->active))
 		return 0;
 	/*
 	 * Given above conditions, the dc state cannot be NULL because:
@@ -9582,6 +9854,7 @@ skip_modeset:
 	 * when a modeset is needed, to ensure it gets reprogrammed.
 	 */
 	if (dm_new_crtc_state->base.color_mgmt_changed ||
+	    dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf ||
 	    drm_atomic_crtc_needs_modeset(new_crtc_state)) {
 		ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state);
 		if (ret)
@@ -9615,7 +9888,8 @@ static bool should_reset_plane(struct drm_atomic_state *state,
 	 * TODO: Remove this hack for all asics once it proves that the
 	 * fast updates works fine on DCN3.2+.
 	 */
-	if (adev->ip_versions[DCE_HWIP][0] < IP_VERSION(3, 2, 0) && state->allow_modeset)
+	if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 2, 0) &&
+	    state->allow_modeset)
 		return true;
 
 	/* Exit early if we know that we're adding or removing the plane. */
@@ -9649,6 +9923,10 @@ static bool should_reset_plane(struct drm_atomic_state *state,
 	 */
 	for_each_oldnew_plane_in_state(state, other, old_other_state, new_other_state, i) {
 		struct amdgpu_framebuffer *old_afb, *new_afb;
+		struct dm_plane_state *dm_new_other_state, *dm_old_other_state;
+
+		dm_new_other_state = to_dm_plane_state(new_other_state);
+		dm_old_other_state = to_dm_plane_state(old_other_state);
 
 		if (other->type == DRM_PLANE_TYPE_CURSOR)
 			continue;
@@ -9685,6 +9963,18 @@ static bool should_reset_plane(struct drm_atomic_state *state,
 		    old_other_state->color_encoding != new_other_state->color_encoding)
 			return true;
 
+		/* HDR/Transfer Function changes. */
+		if (dm_old_other_state->degamma_tf != dm_new_other_state->degamma_tf ||
+		    dm_old_other_state->degamma_lut != dm_new_other_state->degamma_lut ||
+		    dm_old_other_state->hdr_mult != dm_new_other_state->hdr_mult ||
+		    dm_old_other_state->ctm != dm_new_other_state->ctm ||
+		    dm_old_other_state->shaper_lut != dm_new_other_state->shaper_lut ||
+		    dm_old_other_state->shaper_tf != dm_new_other_state->shaper_tf ||
+		    dm_old_other_state->lut3d != dm_new_other_state->lut3d ||
+		    dm_old_other_state->blend_lut != dm_new_other_state->blend_lut ||
+		    dm_old_other_state->blend_tf != dm_new_other_state->blend_tf)
+			return true;
+
 		/* Framebuffer checks fall at the end. */
 		if (!old_other_state->fb || !new_other_state->fb)
 			continue;
@@ -9839,7 +10129,7 @@ static int dm_update_plane_state(struct dc *dc,
 		if (ret)
 			return ret;
 
-		if (!dc_remove_plane_from_context(
+		if (!dc_state_remove_plane(
 				dc,
 				dm_old_crtc_state->stream,
 				dm_old_plane_state->dc_state,
@@ -9917,7 +10207,7 @@ static int dm_update_plane_state(struct dc *dc,
 		 * state. It'll be released when the atomic state is
 		 * cleaned.
 		 */
-		if (!dc_add_plane_to_context(
+		if (!dc_state_add_plane(
 				dc,
 				dm_new_crtc_state->stream,
 				dc_new_plane_state,
@@ -10079,6 +10369,9 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm
 		if (conn_state->crtc != crtc)
 			continue;
 
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (!aconnector->mst_output_port || !aconnector->mst_root)
 			aconnector = NULL;
@@ -10460,7 +10753,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 			DRM_DEBUG_DRIVER("drm_dp_mst_atomic_check() failed\n");
 			goto fail;
 		}
-		status = dc_validate_global_state(dc, dm_state->context, true);
+		status = dc_validate_global_state(dc, dm_state->context, false);
 		if (status != DC_OK) {
 			DRM_DEBUG_DRIVER("DC global validation failure: %s (%d)",
 				       dc_status_to_str(status), status);
@@ -10598,7 +10891,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
 	input->cea_total_length = total_length;
 	memcpy(input->payload, data, length);
 
-	res = dm_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
+	res = dc_wake_and_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
 	if (!res) {
 		DRM_ERROR("EDID CEA parser failed\n");
 		return false;
@@ -10789,8 +11082,7 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
 	struct dm_connector_state *dm_con_state = NULL;
 	struct dc_sink *sink;
 
-	struct drm_device *dev = connector->dev;
-	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_device *adev = drm_to_adev(connector->dev);
 	struct amdgpu_hdmi_vsdb_info vsdb_info = {0};
 	bool freesync_capable = false;
 	enum adaptive_sync_type as_type = ADAPTIVE_SYNC_TYPE_NONE;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 3d480be802cb..9c1871b866cc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -32,6 +32,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_plane.h>
 #include "link_service_types.h"
+#include <drm/drm_writeback.h>
 
 /*
  * This file contains the definition for amdgpu_display_manager
@@ -54,6 +55,9 @@
 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A
 #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40
 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3 0x3
+
+#define AMDGPU_HDR_MULT_DEFAULT (0x100000000LL)
+
 /*
 #include "include/amdgpu_dal_power_if.h"
 #include "amdgpu_dm_irq.h"
@@ -714,11 +718,107 @@ static inline void amdgpu_dm_set_mst_status(uint8_t *status,
 
 #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
 
+struct amdgpu_dm_wb_connector {
+	struct drm_writeback_connector base;
+	struct dc_link *link;
+};
+
+#define to_amdgpu_dm_wb_connector(x) container_of(x, struct amdgpu_dm_wb_connector, base)
+
 extern const struct amdgpu_ip_block_version dm_ip_block;
 
+/* enum amdgpu_transfer_function: pre-defined transfer function supported by AMD.
+ *
+ * It includes standardized transfer functions and pure power functions. The
+ * transfer function coefficients are available at modules/color/color_gamma.c
+ */
+enum amdgpu_transfer_function {
+	AMDGPU_TRANSFER_FUNCTION_DEFAULT,
+	AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF,
+	AMDGPU_TRANSFER_FUNCTION_PQ_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_IDENTITY,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_BT709_OETF,
+	AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF,
+	AMDGPU_TRANSFER_FUNCTION_COUNT
+};
+
 struct dm_plane_state {
 	struct drm_plane_state base;
 	struct dc_plane_state *dc_state;
+
+	/* Plane color mgmt */
+	/**
+	 * @degamma_lut:
+	 *
+	 * 1D LUT for mapping framebuffer/plane pixel data before sampling or
+	 * blending operations. It's usually applied to linearize input space.
+	 * The blob (if not NULL) is an array of &struct drm_color_lut.
+	 */
+	struct drm_property_blob *degamma_lut;
+	/**
+	 * @degamma_tf:
+	 *
+	 * Predefined transfer function to tell DC driver the input space to
+	 * linearize.
+	 */
+	enum amdgpu_transfer_function degamma_tf;
+	/**
+	 * @hdr_mult:
+	 *
+	 * Multiplier to 'gain' the plane.  When PQ is decoded using the fixed
+	 * func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on
+	 * AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously.
+	 * Therefore, 1.0 multiplier = 80 nits for SDR content.  So if you
+	 * want, 203 nits for SDR content, pass in (203.0 / 80.0).  Format is
+	 * S31.32 sign-magnitude.
+	 *
+	 * HDR multiplier can wide range beyond [0.0, 1.0]. This means that PQ
+	 * TF is needed for any subsequent linear-to-non-linear transforms.
+	 */
+	__u64 hdr_mult;
+	/**
+	 * @ctm:
+	 *
+	 * Color transformation matrix. The blob (if not NULL) is a &struct
+	 * drm_color_ctm_3x4.
+	 */
+	struct drm_property_blob *ctm;
+	/**
+	 * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an
+	 * array of &struct drm_color_lut.
+	 */
+	struct drm_property_blob *shaper_lut;
+	/**
+	 * @shaper_tf:
+	 *
+	 * Predefined transfer function to delinearize color space.
+	 */
+	enum amdgpu_transfer_function shaper_tf;
+	/**
+	 * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of
+	 * &struct drm_color_lut.
+	 */
+	struct drm_property_blob *lut3d;
+	/**
+	 * @blend_lut: blend lut lookup table blob. The blob (if not NULL) is an
+	 * array of &struct drm_color_lut.
+	 */
+	struct drm_property_blob *blend_lut;
+	/**
+	 * @blend_tf:
+	 *
+	 * Pre-defined transfer function for converting plane pixel data before
+	 * applying blend LUT.
+	 */
+	enum amdgpu_transfer_function blend_tf;
 };
 
 struct dm_crtc_state {
@@ -743,6 +843,14 @@ struct dm_crtc_state {
 	struct dc_info_packet vrr_infopacket;
 
 	int abm_level;
+
+	/**
+	 * @regamma_tf:
+	 *
+	 * Pre-defined transfer function for converting internal FB -> wire
+	 * encoding.
+	 */
+	enum amdgpu_transfer_function regamma_tf;
 };
 
 #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base)
@@ -804,14 +912,22 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
 
 void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
 
+/* 3D LUT max size is 17x17x17 (4913 entries) */
+#define MAX_COLOR_3DLUT_SIZE 17
+#define MAX_COLOR_3DLUT_BITDEPTH 12
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+				struct drm_plane_state *plane_state);
+/* 1D LUT size */
 #define MAX_COLOR_LUT_ENTRIES 4096
 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */
 #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
 
 void amdgpu_dm_init_color_mod(void);
+int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
 int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
 int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
 int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+				      struct drm_plane_state *plane_state,
 				      struct dc_plane_state *dc_plane_state);
 
 void amdgpu_dm_update_connector_after_detect(
@@ -834,7 +950,7 @@ struct dc_stream_state *
 int dm_atomic_get_state(struct drm_atomic_state *state,
 			struct dm_atomic_state **dm_state);
 
-struct amdgpu_dm_connector *
+struct drm_connector *
 amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
 					     struct drm_crtc *crtc);
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index a4cb23d059bd..9b527bffe11a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -72,6 +72,7 @@
  */
 
 #define MAX_DRM_LUT_VALUE 0xFFFF
+#define SDR_WHITE_LEVEL_INIT_VALUE 80
 
 /**
  * amdgpu_dm_init_color_mod - Initialize the color module.
@@ -84,6 +85,247 @@ void amdgpu_dm_init_color_mod(void)
 	setup_x_points_distribution();
 }
 
+static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x)
+{
+	struct fixed31_32 val;
+
+	/* If negative, convert to 2's complement. */
+	if (x & (1ULL << 63))
+		x = -(x & ~(1ULL << 63));
+
+	val.value = x;
+	return val;
+}
+
+#ifdef AMD_PRIVATE_COLOR
+/* Pre-defined Transfer Functions (TF)
+ *
+ * AMD driver supports pre-defined mathematical functions for transferring
+ * between encoded values and optical/linear space. Depending on HW color caps,
+ * ROMs and curves built by the AMD color module support these transforms.
+ *
+ * The driver-specific color implementation exposes properties for pre-blending
+ * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and
+ * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma
+ * supports ROM curves. AMD color module uses pre-defined coefficients to build
+ * curves for the other blocks. What can be done by each color block is
+ * described by struct dpp_color_capsand struct mpc_color_caps.
+ *
+ * AMD driver-specific color API exposes the following pre-defined transfer
+ * functions:
+ *
+ * - Identity: linear/identity relationship between pixel value and
+ *   luminance value;
+ * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure power functions;
+ * - sRGB: 2.4: The piece-wise transfer function from IEC 61966-2-1:1999;
+ * - BT.709: has a linear segment in the bottom part and then a power function
+ *   with a 0.45 (~1/2.22) gamma for the rest of the range; standardized by
+ *   ITU-R BT.709-6;
+ * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range
+ *   capability of 0 to 10,000 nits; standardized by SMPTE ST 2084.
+ *
+ * The AMD color model is designed with an assumption that SDR (sRGB, BT.709,
+ * Gamma 2.2, etc.) peak white maps (normalized to 1.0 FP) to 80 nits in the PQ
+ * system. This has the implication that PQ EOTF (non-linear to linear) maps to
+ * [0.0..125.0] where 125.0 = 10,000 nits / 80 nits.
+ *
+ * Non-linear and linear forms are described in the table below:
+ *
+ * ┌───────────┬─────────────────────┬──────────────────────┐
+ * │           │     Non-linear      │   Linear             │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │      sRGB │ UNORM or [0.0, 1.0] │ [0.0, 1.0]           │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │     BT709 │ UNORM or [0.0, 1.0] │ [0.0, 1.0]           │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │ Gamma 2.x │ UNORM or [0.0, 1.0] │ [0.0, 1.0]           │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │        PQ │ UNORM or FP16 CCCS* │ [0.0, 125.0]         │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │  Identity │ UNORM or FP16 CCCS* │ [0.0, 1.0] or CCCS** │
+ * └───────────┴─────────────────────┴──────────────────────┘
+ * * CCCS: Windows canonical composition color space
+ * ** Respectively
+ *
+ * In the driver-specific API, color block names attached to TF properties
+ * suggest the intention regarding non-linear encoding pixel's luminance
+ * values. As some newer encodings don't use gamma curve, we make encoding and
+ * decoding explicit by defining an enum list of transfer functions supported
+ * in terms of EOTF and inverse EOTF, where:
+ *
+ * - EOTF (electro-optical transfer function): is the transfer function to go
+ *   from the encoded value to an optical (linear) value. De-gamma functions
+ *   traditionally do this.
+ * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go
+ *   from an optical/linear space (which might have been used for blending)
+ *   back to the encoded values. Gamma functions traditionally do this.
+ */
+static const char * const
+amdgpu_transfer_function_names[] = {
+	[AMDGPU_TRANSFER_FUNCTION_DEFAULT]		= "Default",
+	[AMDGPU_TRANSFER_FUNCTION_IDENTITY]		= "Identity",
+	[AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF]		= "sRGB EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF]	= "BT.709 inv_OETF",
+	[AMDGPU_TRANSFER_FUNCTION_PQ_EOTF]		= "PQ EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF]		= "Gamma 2.2 EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF]		= "Gamma 2.4 EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF]		= "Gamma 2.6 EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF]	= "sRGB inv_EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_BT709_OETF]		= "BT.709 OETF",
+	[AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF]		= "PQ inv_EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF]	= "Gamma 2.2 inv_EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF]	= "Gamma 2.4 inv_EOTF",
+	[AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF]	= "Gamma 2.6 inv_EOTF",
+};
+
+static const u32 amdgpu_eotf =
+	BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_PQ_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF);
+
+static const u32 amdgpu_inv_eotf =
+	BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_BT709_OETF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF) |
+	BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF);
+
+static struct drm_property *
+amdgpu_create_tf_property(struct drm_device *dev,
+			  const char *name,
+			  u32 supported_tf)
+{
+	u32 transfer_functions = supported_tf |
+				 BIT(AMDGPU_TRANSFER_FUNCTION_DEFAULT) |
+				 BIT(AMDGPU_TRANSFER_FUNCTION_IDENTITY);
+	struct drm_prop_enum_list enum_list[AMDGPU_TRANSFER_FUNCTION_COUNT];
+	int i, len;
+
+	len = 0;
+	for (i = 0; i < AMDGPU_TRANSFER_FUNCTION_COUNT; i++) {
+		if ((transfer_functions & BIT(i)) == 0)
+			continue;
+
+		enum_list[len].type = i;
+		enum_list[len].name = amdgpu_transfer_function_names[i];
+		len++;
+	}
+
+	return drm_property_create_enum(dev, DRM_MODE_PROP_ENUM,
+					name, enum_list, len);
+}
+
+int
+amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
+{
+	struct drm_property *prop;
+
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_DEGAMMA_LUT", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_degamma_lut_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 DRM_MODE_PROP_IMMUTABLE,
+					 "AMD_PLANE_DEGAMMA_LUT_SIZE",
+					 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_degamma_lut_size_property = prop;
+
+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
+					 "AMD_PLANE_DEGAMMA_TF",
+					 amdgpu_eotf);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_degamma_tf_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 0, "AMD_PLANE_HDR_MULT", 0, U64_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_hdr_mult_property = prop;
+
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_CTM", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_ctm_property = prop;
+
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_SHAPER_LUT", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_shaper_lut_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 DRM_MODE_PROP_IMMUTABLE,
+					 "AMD_PLANE_SHAPER_LUT_SIZE", 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_shaper_lut_size_property = prop;
+
+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
+					 "AMD_PLANE_SHAPER_TF",
+					 amdgpu_inv_eotf);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_shaper_tf_property = prop;
+
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_LUT3D", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_lut3d_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 DRM_MODE_PROP_IMMUTABLE,
+					 "AMD_PLANE_LUT3D_SIZE", 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_lut3d_size_property = prop;
+
+	prop = drm_property_create(adev_to_drm(adev),
+				   DRM_MODE_PROP_BLOB,
+				   "AMD_PLANE_BLEND_LUT", 0);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_blend_lut_property = prop;
+
+	prop = drm_property_create_range(adev_to_drm(adev),
+					 DRM_MODE_PROP_IMMUTABLE,
+					 "AMD_PLANE_BLEND_LUT_SIZE", 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_blend_lut_size_property = prop;
+
+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
+					 "AMD_PLANE_BLEND_TF",
+					 amdgpu_eotf);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.plane_blend_tf_property = prop;
+
+	prop = amdgpu_create_tf_property(adev_to_drm(adev),
+					 "AMD_CRTC_REGAMMA_TF",
+					 amdgpu_inv_eotf);
+	if (!prop)
+		return -ENOMEM;
+	adev->mode_info.regamma_tf_property = prop;
+
+	return 0;
+}
+#endif
+
 /**
  * __extract_blob_lut - Extracts the DRM lut and lut size from a blob.
  * @blob: DRM color mgmt property blob
@@ -182,7 +424,6 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut,
 static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
 				   struct fixed31_32 *matrix)
 {
-	int64_t val;
 	int i;
 
 	/*
@@ -201,12 +442,29 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
 		}
 
 		/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
-		val = ctm->matrix[i - (i / 4)];
-		/* If negative, convert to 2's complement. */
-		if (val & (1ULL << 63))
-			val = -(val & ~(1ULL << 63));
+		matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i - (i / 4)]);
+	}
+}
 
-		matrix[i].value = val;
+/**
+ * __drm_ctm_3x4_to_dc_matrix - converts a DRM CTM 3x4 to a DC CSC float matrix
+ * @ctm: DRM color transformation matrix with 3x4 dimensions
+ * @matrix: DC CSC float matrix
+ *
+ * The matrix needs to be a 3x4 (12 entry) matrix.
+ */
+static void __drm_ctm_3x4_to_dc_matrix(const struct drm_color_ctm_3x4 *ctm,
+				       struct fixed31_32 *matrix)
+{
+	int i;
+
+	/* The format provided is S31.32, using signed-magnitude representation.
+	 * Our fixed31_32 is also S31.32, but is using 2's complement. We have
+	 * to convert from signed-magnitude to 2's complement.
+	 */
+	for (i = 0; i < 12; i++) {
+		/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
+		matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i]);
 	}
 }
 
@@ -268,16 +526,18 @@ static int __set_output_tf(struct dc_transfer_func *func,
 	struct calculate_buffer cal_buffer = {0};
 	bool res;
 
-	ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
-
 	cal_buffer.buffer_index = -1;
 
-	gamma = dc_create_gamma();
-	if (!gamma)
-		return -ENOMEM;
+	if (lut_size) {
+		ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
 
-	gamma->num_entries = lut_size;
-	__drm_lut_to_dc_gamma(lut, gamma, false);
+		gamma = dc_create_gamma();
+		if (!gamma)
+			return -ENOMEM;
+
+		gamma->num_entries = lut_size;
+		__drm_lut_to_dc_gamma(lut, gamma, false);
+	}
 
 	if (func->tf == TRANSFER_FUNCTION_LINEAR) {
 		/*
@@ -285,27 +545,68 @@ static int __set_output_tf(struct dc_transfer_func *func,
 		 * on top of a linear input. But degamma params can be used
 		 * instead to simulate this.
 		 */
-		gamma->type = GAMMA_CUSTOM;
+		if (gamma)
+			gamma->type = GAMMA_CUSTOM;
 		res = mod_color_calculate_degamma_params(NULL, func,
-							gamma, true);
+							 gamma, gamma != NULL);
 	} else {
 		/*
 		 * Assume sRGB. The actual mapping will depend on whether the
 		 * input was legacy or not.
 		 */
-		gamma->type = GAMMA_CS_TFM_1D;
-		res = mod_color_calculate_regamma_params(func, gamma, false,
+		if (gamma)
+			gamma->type = GAMMA_CS_TFM_1D;
+		res = mod_color_calculate_regamma_params(func, gamma, gamma != NULL,
 							 has_rom, NULL, &cal_buffer);
 	}
 
-	dc_gamma_release(&gamma);
+	if (gamma)
+		dc_gamma_release(&gamma);
 
 	return res ? 0 : -ENOMEM;
 }
 
+static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
+					const struct drm_color_lut *regamma_lut,
+					uint32_t regamma_size, bool has_rom,
+					enum dc_transfer_func_predefined tf)
+{
+	struct dc_transfer_func *out_tf = stream->out_transfer_func;
+	int ret = 0;
+
+	if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) {
+		/*
+		 * CRTC RGM goes into RGM LUT.
+		 *
+		 * Note: there is no implicit sRGB regamma here. We are using
+		 * degamma calculation from color module to calculate the curve
+		 * from a linear base if gamma TF is not set. However, if gamma
+		 * TF (!= Linear) and LUT are set at the same time, we will use
+		 * regamma calculation, and the color module will combine the
+		 * pre-defined TF and the custom LUT values into the LUT that's
+		 * actually programmed.
+		 */
+		out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+		out_tf->tf = tf;
+		out_tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+		ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom);
+	} else {
+		/*
+		 * No CRTC RGM means we can just put the block into bypass
+		 * since we don't have any plane level adjustments using it.
+		 */
+		out_tf->type = TF_TYPE_BYPASS;
+		out_tf->tf = TRANSFER_FUNCTION_LINEAR;
+	}
+
+	return ret;
+}
+
 /**
  * __set_input_tf - calculates the input transfer function based on expected
  * input space.
+ * @caps: dc color capabilities
  * @func: transfer function
  * @lut: lookup table that defines the color space
  * @lut_size: size of respective lut.
@@ -313,27 +614,240 @@ static int __set_output_tf(struct dc_transfer_func *func,
  * Returns:
  * 0 in case of success. -ENOMEM if fails.
  */
-static int __set_input_tf(struct dc_transfer_func *func,
+static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func *func,
 			  const struct drm_color_lut *lut, uint32_t lut_size)
 {
 	struct dc_gamma *gamma = NULL;
 	bool res;
 
-	gamma = dc_create_gamma();
-	if (!gamma)
-		return -ENOMEM;
+	if (lut_size) {
+		gamma = dc_create_gamma();
+		if (!gamma)
+			return -ENOMEM;
 
-	gamma->type = GAMMA_CUSTOM;
-	gamma->num_entries = lut_size;
+		gamma->type = GAMMA_CUSTOM;
+		gamma->num_entries = lut_size;
+
+		__drm_lut_to_dc_gamma(lut, gamma, false);
+	}
 
-	__drm_lut_to_dc_gamma(lut, gamma, false);
+	res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != NULL);
 
-	res = mod_color_calculate_degamma_params(NULL, func, gamma, true);
-	dc_gamma_release(&gamma);
+	if (gamma)
+		dc_gamma_release(&gamma);
 
 	return res ? 0 : -ENOMEM;
 }
 
+static enum dc_transfer_func_predefined
+amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
+{
+	switch (tf) {
+	default:
+	case AMDGPU_TRANSFER_FUNCTION_DEFAULT:
+	case AMDGPU_TRANSFER_FUNCTION_IDENTITY:
+		return TRANSFER_FUNCTION_LINEAR;
+	case AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF:
+		return TRANSFER_FUNCTION_SRGB;
+	case AMDGPU_TRANSFER_FUNCTION_BT709_OETF:
+	case AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF:
+		return TRANSFER_FUNCTION_BT709;
+	case AMDGPU_TRANSFER_FUNCTION_PQ_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF:
+		return TRANSFER_FUNCTION_PQ;
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF:
+		return TRANSFER_FUNCTION_GAMMA22;
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF:
+		return TRANSFER_FUNCTION_GAMMA24;
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF:
+	case AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF:
+		return TRANSFER_FUNCTION_GAMMA26;
+	}
+}
+
+static void __to_dc_lut3d_color(struct dc_rgb *rgb,
+				const struct drm_color_lut lut,
+				int bit_precision)
+{
+	rgb->red = drm_color_lut_extract(lut.red, bit_precision);
+	rgb->green = drm_color_lut_extract(lut.green, bit_precision);
+	rgb->blue  = drm_color_lut_extract(lut.blue, bit_precision);
+}
+
+static void __drm_3dlut_to_dc_3dlut(const struct drm_color_lut *lut,
+				    uint32_t lut3d_size,
+				    struct tetrahedral_params *params,
+				    bool use_tetrahedral_9,
+				    int bit_depth)
+{
+	struct dc_rgb *lut0;
+	struct dc_rgb *lut1;
+	struct dc_rgb *lut2;
+	struct dc_rgb *lut3;
+	int lut_i, i;
+
+
+	if (use_tetrahedral_9) {
+		lut0 = params->tetrahedral_9.lut0;
+		lut1 = params->tetrahedral_9.lut1;
+		lut2 = params->tetrahedral_9.lut2;
+		lut3 = params->tetrahedral_9.lut3;
+	} else {
+		lut0 = params->tetrahedral_17.lut0;
+		lut1 = params->tetrahedral_17.lut1;
+		lut2 = params->tetrahedral_17.lut2;
+		lut3 = params->tetrahedral_17.lut3;
+	}
+
+	for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) {
+		/*
+		 * We should consider the 3D LUT RGB values are distributed
+		 * along four arrays lut0-3 where the first sizes 1229 and the
+		 * other 1228. The bit depth supported for 3dlut channel is
+		 * 12-bit, but DC also supports 10-bit.
+		 *
+		 * TODO: improve color pipeline API to enable the userspace set
+		 * bit depth and 3D LUT size/stride, as specified by VA-API.
+		 */
+		__to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth);
+		__to_dc_lut3d_color(&lut1[lut_i], lut[i + 1], bit_depth);
+		__to_dc_lut3d_color(&lut2[lut_i], lut[i + 2], bit_depth);
+		__to_dc_lut3d_color(&lut3[lut_i], lut[i + 3], bit_depth);
+	}
+	/* lut0 has 1229 points (lut_size/4 + 1) */
+	__to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth);
+}
+
+/* amdgpu_dm_atomic_lut3d - set DRM 3D LUT to DC stream
+ * @drm_lut3d: user 3D LUT
+ * @drm_lut3d_size: size of 3D LUT
+ * @lut3d: DC 3D LUT
+ *
+ * Map user 3D LUT data to DC 3D LUT and all necessary bits to program it
+ * on DCN accordingly.
+ */
+static void amdgpu_dm_atomic_lut3d(const struct drm_color_lut *drm_lut3d,
+				   uint32_t drm_lut3d_size,
+				   struct dc_3dlut *lut)
+{
+	if (!drm_lut3d_size) {
+		lut->state.bits.initialized = 0;
+	} else {
+		/* Stride and bit depth are not programmable by API yet.
+		 * Therefore, only supports 17x17x17 3D LUT (12-bit).
+		 */
+		lut->lut_3d.use_tetrahedral_9 = false;
+		lut->lut_3d.use_12bits = true;
+		lut->state.bits.initialized = 1;
+		__drm_3dlut_to_dc_3dlut(drm_lut3d, drm_lut3d_size, &lut->lut_3d,
+					lut->lut_3d.use_tetrahedral_9,
+					MAX_COLOR_3DLUT_BITDEPTH);
+	}
+}
+
+static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut,
+				       bool has_rom,
+				       enum dc_transfer_func_predefined tf,
+				       uint32_t shaper_size,
+				       struct dc_transfer_func *func_shaper)
+{
+	int ret = 0;
+
+	if (shaper_size || tf != TRANSFER_FUNCTION_LINEAR) {
+		/*
+		 * If user shaper LUT is set, we assume a linear color space
+		 * (linearized by degamma 1D LUT or not).
+		 */
+		func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS;
+		func_shaper->tf = tf;
+		func_shaper->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+		ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, has_rom);
+	} else {
+		func_shaper->type = TF_TYPE_BYPASS;
+		func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
+	}
+
+	return ret;
+}
+
+static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut,
+				       bool has_rom,
+				       enum dc_transfer_func_predefined tf,
+				       uint32_t blend_size,
+				       struct dc_transfer_func *func_blend)
+{
+	int ret = 0;
+
+	if (blend_size || tf != TRANSFER_FUNCTION_LINEAR) {
+		/*
+		 * DRM plane gamma LUT or TF means we are linearizing color
+		 * space before blending (similar to degamma programming). As
+		 * we don't have hardcoded curve support, or we use AMD color
+		 * module to fill the parameters that will be translated to HW
+		 * points.
+		 */
+		func_blend->type = TF_TYPE_DISTRIBUTED_POINTS;
+		func_blend->tf = tf;
+		func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+		ret = __set_input_tf(NULL, func_blend, blend_lut, blend_size);
+	} else {
+		func_blend->type = TF_TYPE_BYPASS;
+		func_blend->tf = TRANSFER_FUNCTION_LINEAR;
+	}
+
+	return ret;
+}
+
+/**
+ * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user
+ * shaper and 3D LUTs match the hw supported size
+ * @adev: amdgpu device
+ * @plane_state: the DRM plane state
+ *
+ * Verifies if pre-blending (DPP) 3D LUT is supported by the HW (DCN 2.0 or
+ * newer) and if the user shaper and 3D LUTs match the supported size.
+ *
+ * Returns:
+ * 0 on success. -EINVAL if lut size are invalid.
+ */
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+				struct drm_plane_state *plane_state)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	const struct drm_color_lut *shaper = NULL, *lut3d = NULL;
+	uint32_t exp_size, size, dim_size = MAX_COLOR_3DLUT_SIZE;
+	bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut;
+
+	/* shaper LUT is only available if 3D LUT color caps */
+	exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0;
+	shaper = __extract_blob_lut(dm_plane_state->shaper_lut, &size);
+
+	if (shaper && size != exp_size) {
+		drm_dbg(&adev->ddev,
+			"Invalid Shaper LUT size. Should be %u but got %u.\n",
+			exp_size, size);
+		return -EINVAL;
+	}
+
+	/* The number of 3D LUT entries is the dimension size cubed */
+	exp_size = has_3dlut ? dim_size * dim_size * dim_size : 0;
+	lut3d = __extract_blob_lut(dm_plane_state->lut3d, &size);
+
+	if (lut3d && size != exp_size) {
+		drm_dbg(&adev->ddev,
+			"Invalid 3D LUT size. Should be %u but got %u.\n",
+			exp_size, size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes
  * @crtc_state: the DRM CRTC state
@@ -401,9 +915,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
 	const struct drm_color_lut *degamma_lut, *regamma_lut;
 	uint32_t degamma_size, regamma_size;
 	bool has_regamma, has_degamma;
+	enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_LINEAR;
 	bool is_legacy;
 	int r;
 
+	tf = amdgpu_tf_to_dc_tf(crtc->regamma_tf);
+
 	r = amdgpu_dm_verify_lut_sizes(&crtc->base);
 	if (r)
 		return r;
@@ -439,27 +956,23 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
 		crtc->cm_is_degamma_srgb = true;
 		stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
 		stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
-
+		/*
+		 * Note: although we pass has_rom as parameter here, we never
+		 * actually use ROM because the color module only takes the ROM
+		 * path if transfer_func->type == PREDEFINED.
+		 *
+		 * See more in mod_color_calculate_regamma_params()
+		 */
 		r = __set_legacy_tf(stream->out_transfer_func, regamma_lut,
 				    regamma_size, has_rom);
 		if (r)
 			return r;
-	} else if (has_regamma) {
-		/* If atomic regamma, CRTC RGM goes into RGM LUT. */
-		stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
-		stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
-
-		r = __set_output_tf(stream->out_transfer_func, regamma_lut,
-				    regamma_size, has_rom);
+	} else {
+		regamma_size = has_regamma ? regamma_size : 0;
+		r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut,
+						 regamma_size, has_rom, tf);
 		if (r)
 			return r;
-	} else {
-		/*
-		 * No CRTC RGM means we can just put the block into bypass
-		 * since we don't have any plane level adjustments using it.
-		 */
-		stream->out_transfer_func->type = TF_TYPE_BYPASS;
-		stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
 	}
 
 	/*
@@ -495,20 +1008,10 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
 	return 0;
 }
 
-/**
- * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
- * @crtc: amdgpu_dm crtc state
- * @dc_plane_state: target DC surface
- *
- * Update the underlying dc_stream_state's input transfer function (ITF) in
- * preparation for hardware commit. The transfer function used depends on
- * the preparation done on the stream for color management.
- *
- * Returns:
- * 0 on success. -ENOMEM if mem allocation fails.
- */
-int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
-				      struct dc_plane_state *dc_plane_state)
+static int
+map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
+			     struct dc_plane_state *dc_plane_state,
+			     struct dc_color_caps *caps)
 {
 	const struct drm_color_lut *degamma_lut;
 	enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
@@ -531,8 +1034,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 						 &degamma_size);
 		ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
 
-		dc_plane_state->in_transfer_func->type =
-			TF_TYPE_DISTRIBUTED_POINTS;
+		dc_plane_state->in_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
 
 		/*
 		 * This case isn't fully correct, but also fairly
@@ -564,11 +1066,11 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 			dc_plane_state->in_transfer_func->tf =
 				TRANSFER_FUNCTION_LINEAR;
 
-		r = __set_input_tf(dc_plane_state->in_transfer_func,
+		r = __set_input_tf(caps, dc_plane_state->in_transfer_func,
 				   degamma_lut, degamma_size);
 		if (r)
 			return r;
-	} else if (crtc->cm_is_degamma_srgb) {
+	} else {
 		/*
 		 * For legacy gamma support we need the regamma input
 		 * in linear space. Assume that the input is sRGB.
@@ -577,14 +1079,209 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 		dc_plane_state->in_transfer_func->tf = tf;
 
 		if (tf != TRANSFER_FUNCTION_SRGB &&
-		    !mod_color_calculate_degamma_params(NULL,
-			    dc_plane_state->in_transfer_func, NULL, false))
+		    !mod_color_calculate_degamma_params(caps,
+							dc_plane_state->in_transfer_func,
+							NULL, false))
 			return -ENOMEM;
-	} else {
-		/* ...Otherwise we can just bypass the DGM block. */
-		dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
-		dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
 	}
 
 	return 0;
 }
+
+static int
+__set_dm_plane_degamma(struct drm_plane_state *plane_state,
+		       struct dc_plane_state *dc_plane_state,
+		       struct dc_color_caps *color_caps)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	const struct drm_color_lut *degamma_lut;
+	enum amdgpu_transfer_function tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	uint32_t degamma_size;
+	bool has_degamma_lut;
+	int ret;
+
+	degamma_lut = __extract_blob_lut(dm_plane_state->degamma_lut,
+					 &degamma_size);
+
+	has_degamma_lut = degamma_lut &&
+			  !__is_lut_linear(degamma_lut, degamma_size);
+
+	tf = dm_plane_state->degamma_tf;
+
+	/* If we don't have plane degamma LUT nor TF to set on DC, we have
+	 * nothing to do here, return.
+	 */
+	if (!has_degamma_lut && tf == AMDGPU_TRANSFER_FUNCTION_DEFAULT)
+		return -EINVAL;
+
+	dc_plane_state->in_transfer_func->tf = amdgpu_tf_to_dc_tf(tf);
+
+	if (has_degamma_lut) {
+		ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
+
+		dc_plane_state->in_transfer_func->type =
+			TF_TYPE_DISTRIBUTED_POINTS;
+
+		ret = __set_input_tf(color_caps, dc_plane_state->in_transfer_func,
+				     degamma_lut, degamma_size);
+		if (ret)
+			return ret;
+       } else {
+		dc_plane_state->in_transfer_func->type =
+			TF_TYPE_PREDEFINED;
+
+		if (!mod_color_calculate_degamma_params(color_caps,
+		    dc_plane_state->in_transfer_func, NULL, false))
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+static int
+amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
+				     struct dc_plane_state *dc_plane_state)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	enum amdgpu_transfer_function blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	const struct drm_color_lut *shaper_lut, *lut3d, *blend_lut;
+	uint32_t shaper_size, lut3d_size, blend_size;
+	int ret;
+
+	dc_plane_state->hdr_mult = amdgpu_dm_fixpt_from_s3132(dm_plane_state->hdr_mult);
+
+	shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size);
+	shaper_size = shaper_lut != NULL ? shaper_size : 0;
+	shaper_tf = dm_plane_state->shaper_tf;
+	lut3d = __extract_blob_lut(dm_plane_state->lut3d, &lut3d_size);
+	lut3d_size = lut3d != NULL ? lut3d_size : 0;
+
+	amdgpu_dm_atomic_lut3d(lut3d, lut3d_size, dc_plane_state->lut3d_func);
+	ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false,
+					  amdgpu_tf_to_dc_tf(shaper_tf),
+					  shaper_size,
+					  dc_plane_state->in_shaper_func);
+	if (ret) {
+		drm_dbg_kms(plane_state->plane->dev,
+			    "setting plane %d shaper LUT failed.\n",
+			    plane_state->plane->index);
+
+		return ret;
+	}
+
+	blend_tf = dm_plane_state->blend_tf;
+	blend_lut = __extract_blob_lut(dm_plane_state->blend_lut, &blend_size);
+	blend_size = blend_lut != NULL ? blend_size : 0;
+
+	ret = amdgpu_dm_atomic_blend_lut(blend_lut, false,
+					 amdgpu_tf_to_dc_tf(blend_tf),
+					 blend_size, dc_plane_state->blend_tf);
+	if (ret) {
+		drm_dbg_kms(plane_state->plane->dev,
+			    "setting plane %d gamma lut failed.\n",
+			    plane_state->plane->index);
+
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
+ * @crtc: amdgpu_dm crtc state
+ * @plane_state: DRM plane state
+ * @dc_plane_state: target DC surface
+ *
+ * Update the underlying dc_stream_state's input transfer function (ITF) in
+ * preparation for hardware commit. The transfer function used depends on
+ * the preparation done on the stream for color management.
+ *
+ * Returns:
+ * 0 on success. -ENOMEM if mem allocation fails.
+ */
+int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+				      struct drm_plane_state *plane_state,
+				      struct dc_plane_state *dc_plane_state)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+	struct drm_color_ctm_3x4 *ctm = NULL;
+	struct dc_color_caps *color_caps = NULL;
+	bool has_crtc_cm_degamma;
+	int ret;
+
+	ret = amdgpu_dm_verify_lut3d_size(adev, plane_state);
+	if (ret) {
+		drm_dbg_driver(&adev->ddev, "amdgpu_dm_verify_lut3d_size() failed\n");
+		return ret;
+	}
+
+	if (dc_plane_state->ctx && dc_plane_state->ctx->dc)
+		color_caps = &dc_plane_state->ctx->dc->caps.color;
+
+	/* Initially, we can just bypass the DGM block. */
+	dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
+	dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
+
+	/* After, we start to update values according to color props */
+	has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb);
+
+	ret = __set_dm_plane_degamma(plane_state, dc_plane_state, color_caps);
+	if (ret == -ENOMEM)
+		return ret;
+
+	/* We only have one degamma block available (pre-blending) for the
+	 * whole color correction pipeline, so that we can't actually perform
+	 * plane and CRTC degamma at the same time. Explicitly reject atomic
+	 * updates when userspace sets both plane and CRTC degamma properties.
+	 */
+	if (has_crtc_cm_degamma && ret != -EINVAL) {
+		drm_dbg_kms(crtc->base.crtc->dev,
+			    "doesn't support plane and CRTC degamma at the same time\n");
+			return -EINVAL;
+	}
+
+	/* If we are here, it means we don't have plane degamma settings, check
+	 * if we have CRTC degamma waiting for mapping to pre-blending degamma
+	 * block
+	 */
+	if (has_crtc_cm_degamma) {
+		/*
+		 * AMD HW doesn't have post-blending degamma caps. When DRM
+		 * CRTC atomic degamma is set, we maps it to DPP degamma block
+		 * (pre-blending) or, on legacy gamma, we use DPP degamma to
+		 * linearize (implicit degamma) from sRGB/BT709 according to
+		 * the input space.
+		 */
+		ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state, color_caps);
+		if (ret)
+			return ret;
+	}
+
+	/* Setup CRTC CTM. */
+	if (dm_plane_state->ctm) {
+		ctm = (struct drm_color_ctm_3x4 *)dm_plane_state->ctm->data;
+		/*
+		 * DCN2 and older don't support both pre-blending and
+		 * post-blending gamut remap. For this HW family, if we have
+		 * the plane and CRTC CTMs simultaneously, CRTC CTM takes
+		 * priority, and we discard plane CTM, as implemented in
+		 * dcn10_program_gamut_remap(). However, DCN3+ has DPP
+		 * (pre-blending) and MPC (post-blending) `gamut remap` blocks;
+		 * therefore, we can program plane and CRTC CTMs together by
+		 * mapping CRTC CTM to MPC and keeping plane CTM setup at DPP,
+		 * as it's done by dcn30_program_gamut_remap().
+		 */
+		__drm_ctm_3x4_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix);
+
+		dc_plane_state->gamut_remap_matrix.enable_remap = true;
+		dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
+	} else {
+		/* Bypass CTM. */
+		dc_plane_state->gamut_remap_matrix.enable_remap = false;
+		dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
+	}
+
+	return amdgpu_dm_plane_set_color_properties(plane_state, dc_plane_state);
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
index 52ecfa746b54..f936a35fa9eb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
@@ -326,6 +326,9 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
 			if (!connector->state || connector->state->crtc != crtc)
 				continue;
 
+			if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+				continue;
+
 			aconn = to_amdgpu_dm_connector(connector);
 			break;
 		}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index cb0b48bb2a7d..6e715ef3a556 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -29,7 +29,6 @@
 #include "dc.h"
 #include "amdgpu.h"
 #include "amdgpu_dm_psr.h"
-#include "amdgpu_dm_replay.h"
 #include "amdgpu_dm_crtc.h"
 #include "amdgpu_dm_plane.h"
 #include "amdgpu_dm_trace.h"
@@ -124,12 +123,7 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work)
 	 * fill_dc_dirty_rects().
 	 */
 	if (vblank_work->stream && vblank_work->stream->link) {
-		/*
-		 * Prioritize replay, instead of psr
-		 */
-		if (vblank_work->stream->link->replay_settings.replay_feature_enabled)
-			amdgpu_dm_replay_enable(vblank_work->stream, false);
-		else if (vblank_work->enable) {
+		if (vblank_work->enable) {
 			if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 &&
 			    vblank_work->stream->link->psr_settings.psr_allow_active)
 				amdgpu_dm_psr_disable(vblank_work->stream);
@@ -138,7 +132,6 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work)
 #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
 			   !amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base) &&
 #endif
-			   vblank_work->stream->link->panel_config.psr.disallow_replay &&
 			   vblank_work->acrtc->dm_irq_params.allow_psr_entry) {
 			amdgpu_dm_psr_enable(vblank_work->stream);
 		}
@@ -260,6 +253,7 @@ static struct drm_crtc_state *amdgpu_dm_crtc_duplicate_state(struct drm_crtc *cr
 	state->freesync_config = cur->freesync_config;
 	state->cm_has_degamma = cur->cm_has_degamma;
 	state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb;
+	state->regamma_tf = cur->regamma_tf;
 	state->crc_skip_count = cur->crc_skip_count;
 	state->mpo_requested = cur->mpo_requested;
 	/* TODO Duplicate dc_stream after objects are stream object is flattened */
@@ -296,6 +290,70 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
 }
 #endif
 
+#ifdef AMD_PRIVATE_COLOR
+/**
+ * dm_crtc_additional_color_mgmt - enable additional color properties
+ * @crtc: DRM CRTC
+ *
+ * This function lets the driver enable post-blending CRTC regamma transfer
+ * function property in addition to DRM CRTC gamma LUT. Default value means
+ * linear transfer function, which is the default CRTC gamma LUT behaviour
+ * without this property.
+ */
+static void
+dm_crtc_additional_color_mgmt(struct drm_crtc *crtc)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+	if (adev->dm.dc->caps.color.mpc.ogam_ram)
+		drm_object_attach_property(&crtc->base,
+					   adev->mode_info.regamma_tf_property,
+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+}
+
+static int
+amdgpu_dm_atomic_crtc_set_property(struct drm_crtc *crtc,
+				   struct drm_crtc_state *state,
+				   struct drm_property *property,
+				   uint64_t val)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state);
+
+	if (property == adev->mode_info.regamma_tf_property) {
+		if (acrtc_state->regamma_tf != val) {
+			acrtc_state->regamma_tf = val;
+			acrtc_state->base.color_mgmt_changed |= 1;
+		}
+	} else {
+		drm_dbg_atomic(crtc->dev,
+			       "[CRTC:%d:%s] unknown property [PROP:%d:%s]]\n",
+			       crtc->base.id, crtc->name,
+			       property->base.id, property->name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+amdgpu_dm_atomic_crtc_get_property(struct drm_crtc *crtc,
+				   const struct drm_crtc_state *state,
+				   struct drm_property *property,
+				   uint64_t *val)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state);
+
+	if (property == adev->mode_info.regamma_tf_property)
+		*val = acrtc_state->regamma_tf;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+#endif
+
 /* Implemented only the options currently available for the driver */
 static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
 	.reset = amdgpu_dm_crtc_reset_state,
@@ -314,6 +372,10 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
 #if defined(CONFIG_DEBUG_FS)
 	.late_register = amdgpu_dm_crtc_late_register,
 #endif
+#ifdef AMD_PRIVATE_COLOR
+	.atomic_set_property = amdgpu_dm_atomic_crtc_set_property,
+	.atomic_get_property = amdgpu_dm_atomic_crtc_get_property,
+#endif
 };
 
 static void amdgpu_dm_crtc_helper_disable(struct drm_crtc *crtc)
@@ -489,6 +551,9 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
 
 	drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
 
+#ifdef AMD_PRIVATE_COLOR
+	dm_crtc_additional_color_mgmt(&acrtc->base);
+#endif
 	return 0;
 
 fail:
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 13a177d34376..68a846323912 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -2971,6 +2971,85 @@ static int allow_edp_hotplug_detection_set(void *data, u64 val)
 	return 0;
 }
 
+static int dmub_trace_mask_set(void *data, u64 val)
+{
+	struct amdgpu_device *adev = data;
+	struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
+	enum dmub_gpint_command cmd;
+	u64 mask = 0xffff;
+	u8 shift = 0;
+	u32 res;
+	int i;
+
+	if (!srv->fw_version)
+		return -EINVAL;
+
+	for (i = 0;  i < 4; i++) {
+		res = (val & mask) >> shift;
+
+		switch (i) {
+		case 0:
+			cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0;
+			break;
+		case 1:
+			cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1;
+			break;
+		case 2:
+			cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2;
+			break;
+		case 3:
+			cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3;
+			break;
+		}
+
+		if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, res, NULL, DM_DMUB_WAIT_TYPE_WAIT))
+			return -EIO;
+
+		usleep_range(100, 1000);
+
+		mask <<= 16;
+		shift += 16;
+	}
+
+	return 0;
+}
+
+static int dmub_trace_mask_show(void *data, u64 *val)
+{
+	enum dmub_gpint_command cmd = DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0;
+	struct amdgpu_device *adev = data;
+	struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
+	u8 shift = 0;
+	u64 raw = 0;
+	u64 res = 0;
+	int i = 0;
+
+	if (!srv->fw_version)
+		return -EINVAL;
+
+	while (i < 4) {
+		uint32_t response;
+
+		if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, 0, &response, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+			return -EIO;
+
+		raw = response;
+		usleep_range(100, 1000);
+
+		cmd++;
+		res |= (raw << shift);
+		shift += 16;
+		i++;
+	}
+
+	*val = res;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(dmub_trace_mask_fops, dmub_trace_mask_show,
+			 dmub_trace_mask_set, "0x%llx\n");
+
 /*
  * Set dmcub trace event IRQ enable or disable.
  * Usage to enable dmcub trace event IRQ: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
@@ -3647,12 +3726,16 @@ static int capabilities_show(struct seq_file *m, void *unused)
 	bool mall_supported = dc->caps.mall_size_total;
 	bool subvp_supported = dc->caps.subvp_fw_processing_delay_us;
 	unsigned int mall_in_use = false;
-	unsigned int subvp_in_use = dc->cap_funcs.get_subvp_en(dc, dc->current_state);
+	unsigned int subvp_in_use = false;
+
 	struct hubbub *hubbub = dc->res_pool->hubbub;
 
 	if (hubbub->funcs->get_mall_en)
 		hubbub->funcs->get_mall_en(hubbub, &mall_in_use);
 
+	if (dc->cap_funcs.get_subvp_en)
+		subvp_in_use = dc->cap_funcs.get_subvp_en(dc, dc->current_state);
+
 	seq_printf(m, "mall supported: %s, enabled: %s\n",
 			   mall_supported ? "yes" : "no", mall_in_use ? "yes" : "no");
 	seq_printf(m, "sub-viewport supported: %s, enabled: %s\n",
@@ -3880,6 +3963,9 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
 	debugfs_create_file_unsafe("amdgpu_dm_force_timing_sync", 0644, root,
 				   adev, &force_timing_sync_ops);
 
+	debugfs_create_file_unsafe("amdgpu_dm_dmub_trace_mask", 0644, root,
+				   adev, &dmub_trace_mask_fops);
+
 	debugfs_create_file_unsafe("amdgpu_dm_dmcub_trace_event_en", 0644, root,
 				   adev, &dmcub_trace_event_state_fops);
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index eb6121ad92fd..eaf8d9f48244 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -64,6 +64,12 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps)
 		DRM_DEBUG_DRIVER("Disabling FAMS on monitor with panel id %X\n", panel_id);
 		edid_caps->panel_patch.disable_fams = true;
 		break;
+	/* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */
+	case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB):
+	case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B):
+		DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id);
+		edid_caps->panel_patch.remove_sink_ext_caps = true;
+		break;
 	default:
 		return;
 	}
@@ -334,15 +340,14 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger(
 	return ACT_SUCCESS;
 }
 
-bool dm_helpers_dp_mst_send_payload_allocation(
+void dm_helpers_dp_mst_send_payload_allocation(
 		struct dc_context *ctx,
-		const struct dc_stream_state *stream,
-		bool enable)
+		const struct dc_stream_state *stream)
 {
 	struct amdgpu_dm_connector *aconnector;
 	struct drm_dp_mst_topology_state *mst_state;
 	struct drm_dp_mst_topology_mgr *mst_mgr;
-	struct drm_dp_mst_atomic_payload *new_payload, old_payload;
+	struct drm_dp_mst_atomic_payload *new_payload;
 	enum mst_progress_status set_flag = MST_ALLOCATE_NEW_PAYLOAD;
 	enum mst_progress_status clr_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
 	int ret = 0;
@@ -350,25 +355,13 @@ bool dm_helpers_dp_mst_send_payload_allocation(
 	aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
 
 	if (!aconnector || !aconnector->mst_root)
-		return false;
+		return;
 
 	mst_mgr = &aconnector->mst_root->mst_mgr;
 	mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
-
 	new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
 
-	if (!enable) {
-		set_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
-		clr_flag = MST_ALLOCATE_NEW_PAYLOAD;
-	}
-
-	if (enable) {
-		ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, new_payload);
-	} else {
-		dm_helpers_construct_old_payload(mst_mgr, mst_state,
-						 new_payload, &old_payload);
-		drm_dp_remove_payload_part2(mst_mgr, mst_state, &old_payload, new_payload);
-	}
+	ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, new_payload);
 
 	if (ret) {
 		amdgpu_dm_set_mst_status(&aconnector->mst_status,
@@ -379,10 +372,36 @@ bool dm_helpers_dp_mst_send_payload_allocation(
 		amdgpu_dm_set_mst_status(&aconnector->mst_status,
 			clr_flag, false);
 	}
-
-	return true;
 }
 
+void dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+		struct dc_context *ctx,
+		const struct dc_stream_state *stream)
+{
+	struct amdgpu_dm_connector *aconnector;
+	struct drm_dp_mst_topology_state *mst_state;
+	struct drm_dp_mst_topology_mgr *mst_mgr;
+	struct drm_dp_mst_atomic_payload *new_payload, old_payload;
+	enum mst_progress_status set_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
+	enum mst_progress_status clr_flag = MST_ALLOCATE_NEW_PAYLOAD;
+
+	aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+
+	if (!aconnector || !aconnector->mst_root)
+		return;
+
+	mst_mgr = &aconnector->mst_root->mst_mgr;
+	mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
+	new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
+	dm_helpers_construct_old_payload(mst_mgr, mst_state,
+					 new_payload, &old_payload);
+
+	drm_dp_remove_payload_part2(mst_mgr, mst_state, &old_payload, new_payload);
+
+	amdgpu_dm_set_mst_status(&aconnector->mst_status, set_flag, true);
+	amdgpu_dm_set_mst_status(&aconnector->mst_status, clr_flag, false);
+ }
+
 void dm_dtn_log_begin(struct dc_context *ctx,
 	struct dc_log_buffer_ctx *log_ctx)
 {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index 51467f132c26..58b880acb087 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -894,10 +894,15 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev)
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
-		struct amdgpu_dm_connector *amdgpu_dm_connector =
-				to_amdgpu_dm_connector(connector);
+		struct amdgpu_dm_connector *amdgpu_dm_connector;
+		const struct dc_link *dc_link;
 
-		const struct dc_link *dc_link = amdgpu_dm_connector->dc_link;
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+
+		dc_link = amdgpu_dm_connector->dc_link;
 
 		if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
 			dc_interrupt_set(adev->dm.dc,
@@ -930,9 +935,14 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev)
 
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
-		struct amdgpu_dm_connector *amdgpu_dm_connector =
-				to_amdgpu_dm_connector(connector);
-		const struct dc_link *dc_link = amdgpu_dm_connector->dc_link;
+		struct amdgpu_dm_connector *amdgpu_dm_connector;
+		const struct dc_link *dc_link;
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+		dc_link = amdgpu_dm_connector->dc_link;
 
 		if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
 			dc_interrupt_set(adev->dm.dc,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 3608d520b227..941e96f100f4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -28,6 +28,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_fixed.h>
+#include <drm/drm_edid.h>
 #include "dm_services.h"
 #include "amdgpu.h"
 #include "amdgpu_dm.h"
@@ -45,7 +46,7 @@
 #include "amdgpu_dm_debugfs.h"
 #endif
 
-#include "dc/dcn20/dcn20_resource.h"
+#include "dc/resource/dcn20/dcn20_resource.h"
 
 #define PEAK_FACTOR_X1000 1006
 
@@ -425,8 +426,7 @@ dm_mst_atomic_best_encoder(struct drm_connector *connector,
 {
 	struct drm_connector_state *connector_state = drm_atomic_get_new_connector_state(state,
 											 connector);
-	struct drm_device *dev = connector->dev;
-	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_device *adev = drm_to_adev(connector->dev);
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(connector_state->crtc);
 
 	return &adev->dm.mst_encoders[acrtc->crtc_id].base;
@@ -1501,14 +1501,16 @@ int pre_validate_dsc(struct drm_atomic_state *state,
 		int ind = find_crtc_index_in_state_by_stream(state, stream);
 
 		if (ind >= 0) {
+			struct drm_connector *connector;
 			struct amdgpu_dm_connector *aconnector;
 			struct drm_connector_state *drm_new_conn_state;
 			struct dm_connector_state *dm_new_conn_state;
 			struct dm_crtc_state *dm_old_crtc_state;
 
-			aconnector =
+			connector =
 				amdgpu_dm_find_first_crtc_matching_connector(state,
 									     state->crtcs[ind].ptr);
+			aconnector = to_amdgpu_dm_connector(connector);
 			drm_new_conn_state =
 				drm_atomic_get_new_connector_state(state,
 								   &aconnector->base);
@@ -1603,9 +1605,8 @@ enum dc_status dm_dp_mst_is_port_support_mode(
 	struct dc_link_settings cur_link_settings;
 	unsigned int end_to_end_bw_in_kbps = 0;
 	unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0;
-	unsigned int max_compressed_bw_in_kbps = 0;
 	struct dc_dsc_bw_range bw_range = {0};
-	uint16_t full_pbn = aconnector->mst_output_port->full_pbn;
+	struct dc_dsc_config_options dsc_options = {0};
 
 	/*
 	 * Consider the case with the depth of the mst topology tree is equal or less than 2
@@ -1621,30 +1622,39 @@ enum dc_status dm_dp_mst_is_port_support_mode(
 	   (aconnector->mst_output_port->passthrough_aux ||
 	    aconnector->dsc_aux == &aconnector->mst_output_port->aux)) {
 		cur_link_settings = stream->link->verified_link_cap;
+		upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, &cur_link_settings);
+		down_link_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn);
 
-		upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
-							       &cur_link_settings);
-		down_link_bw_in_kbps = kbps_from_pbn(full_pbn);
-
-		/* pick the bottleneck */
-		end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps,
-					    down_link_bw_in_kbps);
-
-		/*
-		 * use the maximum dsc compression bandwidth as the required
-		 * bandwidth for the mode
-		 */
-		max_compressed_bw_in_kbps = bw_range.min_kbps;
+		/* pick the end to end bw bottleneck */
+		end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, down_link_bw_in_kbps);
 
-		if (end_to_end_bw_in_kbps < max_compressed_bw_in_kbps) {
-			DRM_DEBUG_DRIVER("Mode does not fit into DSC pass-through bandwidth validation\n");
+		if (end_to_end_bw_in_kbps < bw_range.min_kbps) {
+			DRM_DEBUG_DRIVER("maximum dsc compression cannot fit into end-to-end bandwidth\n");
 			return DC_FAIL_BANDWIDTH_VALIDATE;
 		}
+
+		if (end_to_end_bw_in_kbps < bw_range.stream_kbps) {
+			dc_dsc_get_default_config_option(stream->link->dc, &dsc_options);
+			dsc_options.max_target_bpp_limit_override_x16 = aconnector->base.display_info.max_dsc_bpp * 16;
+			if (dc_dsc_compute_config(stream->sink->ctx->dc->res_pool->dscs[0],
+					&stream->sink->dsc_caps.dsc_dec_caps,
+					&dsc_options,
+					end_to_end_bw_in_kbps,
+					&stream->timing,
+					dc_link_get_highest_encoding_format(stream->link),
+					&stream->timing.dsc_cfg)) {
+				stream->timing.flags.DSC = 1;
+				DRM_DEBUG_DRIVER("end-to-end bandwidth require dsc and dsc config found\n");
+			} else {
+				DRM_DEBUG_DRIVER("end-to-end bandwidth require dsc but dsc config not found\n");
+				return DC_FAIL_BANDWIDTH_VALIDATE;
+			}
+		}
 	} else {
 		/* check if mode could be supported within full_pbn */
 		bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3;
 		pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp << 4);
-		if (pbn > full_pbn)
+		if (pbn > aconnector->mst_output_port->full_pbn)
 			return DC_FAIL_BANDWIDTH_VALIDATE;
 	}
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 116121e647ca..8a4c40b4c27e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1337,8 +1337,14 @@ static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane)
 	amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL);
 	WARN_ON(amdgpu_state == NULL);
 
-	if (amdgpu_state)
-		__drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
+	if (!amdgpu_state)
+		return;
+
+	__drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
+	amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT;
+	amdgpu_state->shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+	amdgpu_state->blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
 }
 
 static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane)
@@ -1357,6 +1363,27 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct
 		dc_plane_state_retain(dm_plane_state->dc_state);
 	}
 
+	if (old_dm_plane_state->degamma_lut)
+		dm_plane_state->degamma_lut =
+			drm_property_blob_get(old_dm_plane_state->degamma_lut);
+	if (old_dm_plane_state->ctm)
+		dm_plane_state->ctm =
+			drm_property_blob_get(old_dm_plane_state->ctm);
+	if (old_dm_plane_state->shaper_lut)
+		dm_plane_state->shaper_lut =
+			drm_property_blob_get(old_dm_plane_state->shaper_lut);
+	if (old_dm_plane_state->lut3d)
+		dm_plane_state->lut3d =
+			drm_property_blob_get(old_dm_plane_state->lut3d);
+	if (old_dm_plane_state->blend_lut)
+		dm_plane_state->blend_lut =
+			drm_property_blob_get(old_dm_plane_state->blend_lut);
+
+	dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
+	dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult;
+	dm_plane_state->shaper_tf = old_dm_plane_state->shaper_tf;
+	dm_plane_state->blend_tf = old_dm_plane_state->blend_tf;
+
 	return &dm_plane_state->base;
 }
 
@@ -1424,12 +1451,206 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane,
 {
 	struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
 
+	if (dm_plane_state->degamma_lut)
+		drm_property_blob_put(dm_plane_state->degamma_lut);
+	if (dm_plane_state->ctm)
+		drm_property_blob_put(dm_plane_state->ctm);
+	if (dm_plane_state->lut3d)
+		drm_property_blob_put(dm_plane_state->lut3d);
+	if (dm_plane_state->shaper_lut)
+		drm_property_blob_put(dm_plane_state->shaper_lut);
+	if (dm_plane_state->blend_lut)
+		drm_property_blob_put(dm_plane_state->blend_lut);
+
 	if (dm_plane_state->dc_state)
 		dc_plane_state_release(dm_plane_state->dc_state);
 
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
+#ifdef AMD_PRIVATE_COLOR
+static void
+dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
+					     struct drm_plane *plane)
+{
+	struct amdgpu_mode_info mode_info = dm->adev->mode_info;
+	struct dpp_color_caps dpp_color_caps = dm->dc->caps.color.dpp;
+
+	/* Check HW color pipeline capabilities on DPP block (pre-blending)
+	 * before exposing related properties.
+	 */
+	if (dpp_color_caps.dgam_ram || dpp_color_caps.gamma_corr) {
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_degamma_lut_property,
+					   0);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_degamma_lut_size_property,
+					   MAX_COLOR_LUT_ENTRIES);
+		drm_object_attach_property(&plane->base,
+					   dm->adev->mode_info.plane_degamma_tf_property,
+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+	}
+	/* HDR MULT is always available */
+	drm_object_attach_property(&plane->base,
+				   dm->adev->mode_info.plane_hdr_mult_property,
+				   AMDGPU_HDR_MULT_DEFAULT);
+
+	/* Only enable plane CTM if both DPP and MPC gamut remap is available. */
+	if (dm->dc->caps.color.mpc.gamut_remap)
+		drm_object_attach_property(&plane->base,
+					   dm->adev->mode_info.plane_ctm_property, 0);
+
+	if (dpp_color_caps.hw_3d_lut) {
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_shaper_lut_property, 0);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_shaper_lut_size_property,
+					   MAX_COLOR_LUT_ENTRIES);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_shaper_tf_property,
+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_lut3d_property, 0);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_lut3d_size_property,
+					   MAX_COLOR_3DLUT_SIZE);
+	}
+
+	if (dpp_color_caps.ogam_ram) {
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_blend_lut_property, 0);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_blend_lut_size_property,
+					   MAX_COLOR_LUT_ENTRIES);
+		drm_object_attach_property(&plane->base,
+					   mode_info.plane_blend_tf_property,
+					   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+	}
+}
+
+static int
+dm_atomic_plane_set_property(struct drm_plane *plane,
+			     struct drm_plane_state *state,
+			     struct drm_property *property,
+			     uint64_t val)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
+	struct amdgpu_device *adev = drm_to_adev(plane->dev);
+	bool replaced = false;
+	int ret;
+
+	if (property == adev->mode_info.plane_degamma_lut_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->degamma_lut,
+							val, -1,
+							sizeof(struct drm_color_lut),
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
+	} else if (property == adev->mode_info.plane_degamma_tf_property) {
+		if (dm_plane_state->degamma_tf != val) {
+			dm_plane_state->degamma_tf = val;
+			dm_plane_state->base.color_mgmt_changed = 1;
+		}
+	} else if (property == adev->mode_info.plane_hdr_mult_property) {
+		if (dm_plane_state->hdr_mult != val) {
+			dm_plane_state->hdr_mult = val;
+			dm_plane_state->base.color_mgmt_changed = 1;
+		}
+	} else if (property == adev->mode_info.plane_ctm_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->ctm,
+							val,
+							sizeof(struct drm_color_ctm_3x4), -1,
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
+	} else if (property == adev->mode_info.plane_shaper_lut_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->shaper_lut,
+							val, -1,
+							sizeof(struct drm_color_lut),
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
+	} else if (property == adev->mode_info.plane_shaper_tf_property) {
+		if (dm_plane_state->shaper_tf != val) {
+			dm_plane_state->shaper_tf = val;
+			dm_plane_state->base.color_mgmt_changed = 1;
+		}
+	} else if (property == adev->mode_info.plane_lut3d_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->lut3d,
+							val, -1,
+							sizeof(struct drm_color_lut),
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
+	} else if (property == adev->mode_info.plane_blend_lut_property) {
+		ret = drm_property_replace_blob_from_id(plane->dev,
+							&dm_plane_state->blend_lut,
+							val, -1,
+							sizeof(struct drm_color_lut),
+							&replaced);
+		dm_plane_state->base.color_mgmt_changed |= replaced;
+		return ret;
+	} else if (property == adev->mode_info.plane_blend_tf_property) {
+		if (dm_plane_state->blend_tf != val) {
+			dm_plane_state->blend_tf = val;
+			dm_plane_state->base.color_mgmt_changed = 1;
+		}
+	} else {
+		drm_dbg_atomic(plane->dev,
+			       "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n",
+			       plane->base.id, plane->name,
+			       property->base.id, property->name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+dm_atomic_plane_get_property(struct drm_plane *plane,
+			     const struct drm_plane_state *state,
+			     struct drm_property *property,
+			     uint64_t *val)
+{
+	struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
+	struct amdgpu_device *adev = drm_to_adev(plane->dev);
+
+	if (property == adev->mode_info.plane_degamma_lut_property) {
+		*val = (dm_plane_state->degamma_lut) ?
+			dm_plane_state->degamma_lut->base.id : 0;
+	} else if (property == adev->mode_info.plane_degamma_tf_property) {
+		*val = dm_plane_state->degamma_tf;
+	} else if (property == adev->mode_info.plane_hdr_mult_property) {
+		*val = dm_plane_state->hdr_mult;
+	} else if (property == adev->mode_info.plane_ctm_property) {
+		*val = (dm_plane_state->ctm) ?
+			dm_plane_state->ctm->base.id : 0;
+	} else 	if (property == adev->mode_info.plane_shaper_lut_property) {
+		*val = (dm_plane_state->shaper_lut) ?
+			dm_plane_state->shaper_lut->base.id : 0;
+	} else if (property == adev->mode_info.plane_shaper_tf_property) {
+		*val = dm_plane_state->shaper_tf;
+	} else 	if (property == adev->mode_info.plane_lut3d_property) {
+		*val = (dm_plane_state->lut3d) ?
+			dm_plane_state->lut3d->base.id : 0;
+	} else 	if (property == adev->mode_info.plane_blend_lut_property) {
+		*val = (dm_plane_state->blend_lut) ?
+			dm_plane_state->blend_lut->base.id : 0;
+	} else if (property == adev->mode_info.plane_blend_tf_property) {
+		*val = dm_plane_state->blend_tf;
+
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#endif
+
 static const struct drm_plane_funcs dm_plane_funcs = {
 	.update_plane	= drm_atomic_helper_update_plane,
 	.disable_plane	= drm_atomic_helper_disable_plane,
@@ -1438,6 +1659,10 @@ static const struct drm_plane_funcs dm_plane_funcs = {
 	.atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state,
 	.atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state,
 	.format_mod_supported = amdgpu_dm_plane_format_mod_supported,
+#ifdef AMD_PRIVATE_COLOR
+	.atomic_set_property = dm_atomic_plane_set_property,
+	.atomic_get_property = dm_atomic_plane_get_property,
+#endif
 };
 
 int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
@@ -1517,6 +1742,9 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
 
 	drm_plane_helper_add(plane, &dm_plane_helper_funcs);
 
+#ifdef AMD_PRIVATE_COLOR
+	dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
+#endif
 	/* Create (reset) the plane state */
 	if (plane->funcs->reset)
 		plane->funcs->reset(plane);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
index 08ce3bb8f640..1f08c6564c3b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
@@ -51,6 +51,9 @@ static bool link_supports_psrsu(struct dc_link *link)
 	    !link->dpcd_caps.psr_info.psr2_su_y_granularity_cap)
 		return false;
 
+	if (amdgpu_dc_debug_mask & DC_DISABLE_PSR_SU)
+		return false;
+
 	return dc_dmub_check_min_version(dc->ctx->dmub_srv->dmub);
 }
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
new file mode 100644
index 000000000000..16e72d623630
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services_types.h"
+
+#include "amdgpu.h"
+#include "amdgpu_dm.h"
+#include "amdgpu_dm_wb.h"
+#include "amdgpu_display.h"
+#include "dc.h"
+
+#include <drm/drm_edid.h>
+#include <drm/drm_atomic_state_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
+
+static const u32 amdgpu_dm_wb_formats[] = {
+	DRM_FORMAT_XRGB2101010,
+};
+
+static int amdgpu_dm_wb_encoder_atomic_check(struct drm_encoder *encoder,
+					struct drm_crtc_state *crtc_state,
+					struct drm_connector_state *conn_state)
+{
+	struct drm_framebuffer *fb;
+	const struct drm_display_mode *mode = &crtc_state->mode;
+	bool found = false;
+	uint8_t i;
+
+	if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
+		return 0;
+
+	fb = conn_state->writeback_job->fb;
+	if (fb->width != mode->hdisplay || fb->height != mode->vdisplay) {
+		DRM_DEBUG_KMS("Invalid framebuffer size %ux%u\n",
+			      fb->width, fb->height);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < sizeof(amdgpu_dm_wb_formats) / sizeof(u32); i++) {
+		if (fb->format->format == amdgpu_dm_wb_formats[i])
+			found = true;
+	}
+
+	if (!found) {
+		DRM_DEBUG_KMS("Invalid pixel format %p4cc\n",
+			      &fb->format->format);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+
+static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+
+	return drm_add_modes_noedid(connector, dev->mode_config.max_width,
+				    dev->mode_config.max_height);
+}
+
+static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector,
+			       struct drm_writeback_job *job)
+{
+	struct amdgpu_framebuffer *afb;
+	struct drm_gem_object *obj;
+	struct amdgpu_device *adev;
+	struct amdgpu_bo *rbo;
+	uint32_t domain;
+	int r;
+
+	if (!job->fb) {
+		DRM_DEBUG_KMS("No FB bound\n");
+		return 0;
+	}
+
+	afb = to_amdgpu_framebuffer(job->fb);
+	obj = job->fb->obj[0];
+	rbo = gem_to_amdgpu_bo(obj);
+	adev = amdgpu_ttm_adev(rbo->tbo.bdev);
+
+	r = amdgpu_bo_reserve(rbo, true);
+	if (r) {
+		dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
+		return r;
+	}
+
+	r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
+	if (r) {
+		dev_err(adev->dev, "reserving fence slot failed (%d)\n", r);
+		goto error_unlock;
+	}
+
+	domain = amdgpu_display_supported_domains(adev, rbo->flags);
+
+	r = amdgpu_bo_pin(rbo, domain);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
+		goto error_unlock;
+	}
+
+	r = amdgpu_ttm_alloc_gart(&rbo->tbo);
+	if (unlikely(r != 0)) {
+		DRM_ERROR("%p bind failed\n", rbo);
+		goto error_unpin;
+	}
+
+	amdgpu_bo_unreserve(rbo);
+
+	afb->address = amdgpu_bo_gpu_offset(rbo);
+
+	amdgpu_bo_ref(rbo);
+
+	return 0;
+
+error_unpin:
+	amdgpu_bo_unpin(rbo);
+
+error_unlock:
+	amdgpu_bo_unreserve(rbo);
+	return r;
+}
+
+static void amdgpu_dm_wb_cleanup_job(struct drm_writeback_connector *connector,
+				struct drm_writeback_job *job)
+{
+	struct amdgpu_bo *rbo;
+	int r;
+
+	if (!job->fb)
+		return;
+
+	rbo = gem_to_amdgpu_bo(job->fb->obj[0]);
+	r = amdgpu_bo_reserve(rbo, false);
+	if (unlikely(r)) {
+		DRM_ERROR("failed to reserve rbo before unpin\n");
+		return;
+	}
+
+	amdgpu_bo_unpin(rbo);
+	amdgpu_bo_unreserve(rbo);
+	amdgpu_bo_unref(&rbo);
+}
+
+static const struct drm_encoder_helper_funcs amdgpu_dm_wb_encoder_helper_funcs = {
+	.atomic_check = amdgpu_dm_wb_encoder_atomic_check,
+};
+
+static const struct drm_connector_funcs amdgpu_dm_wb_connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = drm_connector_cleanup,
+	.reset = amdgpu_dm_connector_funcs_reset,
+	.atomic_duplicate_state = amdgpu_dm_connector_atomic_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static const struct drm_connector_helper_funcs amdgpu_dm_wb_conn_helper_funcs = {
+	.get_modes = amdgpu_dm_wb_connector_get_modes,
+	.prepare_writeback_job = amdgpu_dm_wb_prepare_job,
+	.cleanup_writeback_job = amdgpu_dm_wb_cleanup_job,
+};
+
+int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm,
+				struct amdgpu_dm_wb_connector *wbcon,
+				uint32_t link_index)
+{
+	struct dc *dc = dm->dc;
+	struct dc_link *link = dc_get_link_at_index(dc, link_index);
+	int res = 0;
+
+	wbcon->link = link;
+
+	drm_connector_helper_add(&wbcon->base.base, &amdgpu_dm_wb_conn_helper_funcs);
+
+	res = drm_writeback_connector_init(&dm->adev->ddev, &wbcon->base,
+					    &amdgpu_dm_wb_connector_funcs,
+					    &amdgpu_dm_wb_encoder_helper_funcs,
+					    amdgpu_dm_wb_formats,
+					    ARRAY_SIZE(amdgpu_dm_wb_formats),
+					    amdgpu_dm_get_encoder_crtc_mask(dm->adev));
+
+	if (res)
+		return res;
+	/*
+	 * Some of the properties below require access to state, like bpc.
+	 * Allocate some default initial connector state with our reset helper.
+	 */
+	if (wbcon->base.base.funcs->reset)
+		wbcon->base.base.funcs->reset(&wbcon->base.base);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h
new file mode 100644
index 000000000000..13d31c857dee
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __AMDGPU_DM_WB_H__
+#define __AMDGPU_DM_WB_H__
+
+#include <drm/drm_writeback.h>
+
+int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm,
+				struct amdgpu_dm_wb_connector *dm_wbcon,
+				uint32_t link_index);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile
index 3a169b78e7e4..7991ae468f75 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -22,7 +22,7 @@
 #
 # Makefile for Display Core (dc) component.
 
-DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc
+DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc resource optc
 
 ifdef CONFIG_DRM_AMD_DC_FP
 
@@ -34,12 +34,8 @@ DC_LIBS += dcn21
 DC_LIBS += dcn201
 DC_LIBS += dcn30
 DC_LIBS += dcn301
-DC_LIBS += dcn302
-DC_LIBS += dcn303
 DC_LIBS += dcn31
 DC_LIBS += dcn314
-DC_LIBS += dcn315
-DC_LIBS += dcn316
 DC_LIBS += dcn32
 DC_LIBS += dcn321
 DC_LIBS += dcn35
@@ -51,7 +47,6 @@ DC_LIBS += dce120
 
 DC_LIBS += dce112
 DC_LIBS += dce110
-DC_LIBS += dce100
 DC_LIBS += dce80
 
 ifdef CONFIG_DRM_AMD_DC_SI
@@ -65,7 +60,7 @@ AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LI
 include $(AMD_DC)
 
 DISPLAY_CORE = dc.o dc_stat.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
-dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o
+dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o dc_state.o
 
 DISPLAY_CORE += dc_vm_helper.o
 
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
index e295a839ab47..1090d235086a 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
@@ -103,7 +103,8 @@ void convert_float_matrix(
 
 static uint32_t find_gcd(uint32_t a, uint32_t b)
 {
-	uint32_t remainder = 0;
+	uint32_t remainder;
+
 	while (b != 0) {
 		remainder = a % b;
 		a = b;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 7cdb1a8a0ba0..960c4b4f6ddf 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1014,13 +1014,20 @@ static enum bp_result get_ss_info_v4_5(
 		DC_LOG_BIOS("AS_SIGNAL_TYPE_HDMI ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
 		break;
 	case AS_SIGNAL_TYPE_DISPLAY_PORT:
-		ss_info->spread_spectrum_percentage =
+		if (bp->base.integrated_info) {
+			DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", bp->base.integrated_info->gpuclk_ss_percentage);
+			ss_info->spread_spectrum_percentage =
+					bp->base.integrated_info->gpuclk_ss_percentage;
+			ss_info->type.CENTER_MODE =
+					bp->base.integrated_info->gpuclk_ss_type;
+		} else {
+			ss_info->spread_spectrum_percentage =
 				disp_cntl_tbl->dp_ss_percentage;
-		ss_info->spread_spectrum_range =
+			ss_info->spread_spectrum_range =
 				disp_cntl_tbl->dp_ss_rate_10hz * 10;
-		if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
-			ss_info->type.CENTER_MODE = true;
-
+			if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
+				ss_info->type.CENTER_MODE = true;
+		}
 		DC_LOG_BIOS("AS_SIGNAL_TYPE_DISPLAY_PORT ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
 		break;
 	case AS_SIGNAL_TYPE_GPU_PLL:
@@ -1691,7 +1698,7 @@ static enum bp_result bios_parser_enable_disp_power_gating(
 static enum bp_result bios_parser_enable_lvtma_control(
 	struct dc_bios *dcb,
 	uint8_t uc_pwr_on,
-	uint8_t panel_instance,
+	uint8_t pwrseq_instance,
 	uint8_t bypass_panel_control_wait)
 {
 	struct bios_parser *bp = BP_FROM_DCB(dcb);
@@ -1699,7 +1706,7 @@ static enum bp_result bios_parser_enable_lvtma_control(
 	if (!bp->cmd_tbl.enable_lvtma_control)
 		return BP_RESULT_FAILURE;
 
-	return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, panel_instance, bypass_panel_control_wait);
+	return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, pwrseq_instance, bypass_panel_control_wait);
 }
 
 static bool bios_parser_is_accelerated_mode(
@@ -2214,22 +2221,22 @@ static enum bp_result bios_parser_get_disp_connector_caps_info(
 
 	switch (bp->object_info_tbl.revision.minor) {
 	case 4:
-	    default:
-		    object = get_bios_object(bp, object_id);
-
-		    if (!object)
-			    return BP_RESULT_BADINPUT;
-
-		    record = get_disp_connector_caps_record(bp, object);
-		    if (!record)
-			    return BP_RESULT_NORECORD;
-
-		    info->INTERNAL_DISPLAY =
-			    (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY) ? 1 : 0;
-		    info->INTERNAL_DISPLAY_BL =
-			    (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL) ? 1 : 0;
-		    break;
-	    case 5:
+		default:
+			object = get_bios_object(bp, object_id);
+
+			if (!object)
+				return BP_RESULT_BADINPUT;
+
+			record = get_disp_connector_caps_record(bp, object);
+			if (!record)
+				return BP_RESULT_NORECORD;
+
+			info->INTERNAL_DISPLAY =
+				(record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY) ? 1 : 0;
+			info->INTERNAL_DISPLAY_BL =
+				(record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL) ? 1 : 0;
+			break;
+	case 5:
 		object_path_v3 = get_bios_object_from_path_v3(bp, object_id);
 
 		if (!object_path_v3)
@@ -2391,7 +2398,6 @@ static enum bp_result get_vram_info_v30(
 	return result;
 }
 
-
 /*
  * get_integrated_info_v11
  *
@@ -2814,6 +2820,8 @@ static enum bp_result get_integrated_info_v2_2(
 	info->ma_channel_number = info_v2_2->umachannelnumber;
 	info->dp_ss_control =
 		le16_to_cpu(info_v2_2->reserved1);
+	info->gpuclk_ss_percentage = info_v2_2->gpuclk_ss_percentage;
+	info->gpuclk_ss_type = info_v2_2->gpuclk_ss_type;
 
 	for (i = 0; i < NUMBER_OF_UCHAR_FOR_GUID; ++i) {
 		info->ext_disp_conn_info.gu_id[i] =
@@ -3323,27 +3331,28 @@ static enum bp_result get_bracket_layout_record(
 		DC_LOG_DETECTION_EDID_PARSER("Invalid slot_layout_info\n");
 		return BP_RESULT_BADINPUT;
 	}
+
 	tbl = &bp->object_info_tbl;
 	v1_4 = tbl->v1_4;
 	v1_5 = tbl->v1_5;
 
 	result = BP_RESULT_NORECORD;
 	switch (bp->object_info_tbl.revision.minor) {
-		case 4:
-		default:
-			for (i = 0; i < v1_4->number_of_path; ++i)	{
-				if (bracket_layout_id ==
-					v1_4->display_path[i].display_objid) {
-					result = update_slot_layout_info(dcb, i, slot_layout_info);
-					break;
-				}
+	case 4:
+	default:
+		for (i = 0; i < v1_4->number_of_path; ++i) {
+			if (bracket_layout_id == v1_4->display_path[i].display_objid) {
+				result = update_slot_layout_info(dcb, i, slot_layout_info);
+				break;
 			}
-		    break;
-		case 5:
-			for (i = 0; i < v1_5->number_of_path; ++i)
-				result = update_slot_layout_info_v2(dcb, i, slot_layout_info);
-			break;
+		}
+		break;
+	case 5:
+		for (i = 0; i < v1_5->number_of_path; ++i)
+			result = update_slot_layout_info_v2(dcb, i, slot_layout_info);
+		break;
 	}
+
 	return result;
 }
 
@@ -3352,9 +3361,7 @@ static enum bp_result bios_get_board_layout_info(
 	struct board_layout_info *board_layout_info)
 {
 	unsigned int i;
-
 	struct bios_parser *bp;
-
 	static enum bp_result record_result;
 	unsigned int max_slots;
 
@@ -3364,7 +3371,6 @@ static enum bp_result bios_get_board_layout_info(
 		0, 0
 	};
 
-
 	bp = BP_FROM_DCB(dcb);
 
 	if (board_layout_info == NULL) {
@@ -3545,7 +3551,6 @@ static const struct dc_vbios_funcs vbios_funcs = {
 	.bios_parser_destroy = firmware_parser_destroy,
 
 	.get_board_layout_info = bios_get_board_layout_info,
-	/* TODO: use this fn in hw init?*/
 	.pack_data_tables = bios_parser_pack_data_tables,
 
 	.get_atom_dc_golden_table = bios_get_atom_dc_golden_table,
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index 90a02d7bd3da..293a919d605d 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -123,7 +123,7 @@ static void encoder_control_dmcub(
 		sizeof(cmd.digx_encoder_control.header);
 	cmd.digx_encoder_control.encoder_control.dig.stream_param = *dig;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result encoder_control_digx_v1_5(
@@ -259,7 +259,7 @@ static void transmitter_control_dmcub(
 		sizeof(cmd.dig1_transmitter_control.header);
 	cmd.dig1_transmitter_control.transmitter_control.dig = *dig;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result transmitter_control_v1_6(
@@ -321,7 +321,7 @@ static void transmitter_control_dmcub_v1_7(
 		sizeof(cmd.dig1_transmitter_control.header);
 	cmd.dig1_transmitter_control.transmitter_control.dig_v1_7 = *dig;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result transmitter_control_v1_7(
@@ -429,7 +429,7 @@ static void set_pixel_clock_dmcub(
 		sizeof(cmd.set_pixel_clock.header);
 	cmd.set_pixel_clock.pixel_clock.clk = *clk;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result set_pixel_clock_v7(
@@ -796,7 +796,7 @@ static void enable_disp_power_gating_dmcub(
 		sizeof(cmd.enable_disp_power_gating.header);
 	cmd.enable_disp_power_gating.power_gating.pwr = *pwr;
 
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result enable_disp_power_gating_v2_1(
@@ -976,7 +976,7 @@ static unsigned int get_smu_clock_info_v3_1(struct bios_parser *bp, uint8_t id)
 static enum bp_result enable_lvtma_control(
 	struct bios_parser *bp,
 	uint8_t uc_pwr_on,
-	uint8_t panel_instance,
+	uint8_t pwrseq_instance,
 	uint8_t bypass_panel_control_wait);
 
 static void init_enable_lvtma_control(struct bios_parser *bp)
@@ -989,7 +989,7 @@ static void init_enable_lvtma_control(struct bios_parser *bp)
 static void enable_lvtma_control_dmcub(
 	struct dc_dmub_srv *dmcub,
 	uint8_t uc_pwr_on,
-	uint8_t panel_instance,
+	uint8_t pwrseq_instance,
 	uint8_t bypass_panel_control_wait)
 {
 
@@ -1002,17 +1002,17 @@ static void enable_lvtma_control_dmcub(
 			DMUB_CMD__VBIOS_LVTMA_CONTROL;
 	cmd.lvtma_control.data.uc_pwr_action =
 			uc_pwr_on;
-	cmd.lvtma_control.data.panel_inst =
-			panel_instance;
+	cmd.lvtma_control.data.pwrseq_inst =
+			pwrseq_instance;
 	cmd.lvtma_control.data.bypass_panel_control_wait =
 			bypass_panel_control_wait;
-	dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static enum bp_result enable_lvtma_control(
 	struct bios_parser *bp,
 	uint8_t uc_pwr_on,
-	uint8_t panel_instance,
+	uint8_t pwrseq_instance,
 	uint8_t bypass_panel_control_wait)
 {
 	enum bp_result result = BP_RESULT_FAILURE;
@@ -1021,7 +1021,7 @@ static enum bp_result enable_lvtma_control(
 	    bp->base.ctx->dc->debug.dmub_command_table) {
 		enable_lvtma_control_dmcub(bp->base.ctx->dmub_srv,
 				uc_pwr_on,
-				panel_instance,
+				pwrseq_instance,
 				bypass_panel_control_wait);
 		return BP_RESULT_OK;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
index b6d09bf6cf72..41c8c014397f 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
@@ -96,7 +96,7 @@ struct cmd_tbl {
 			struct bios_parser *bp, uint8_t id);
 	enum bp_result (*enable_lvtma_control)(struct bios_parser *bp,
 			uint8_t uc_pwr_on,
-			uint8_t panel_instance,
+			uint8_t pwrseq_instance,
 			uint8_t bypass_panel_control_wait);
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index 3e73c4e59d40..28a2a837d2f0 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -29,6 +29,7 @@
 #include "dc_types.h"
 #include "dccg.h"
 #include "clk_mgr_internal.h"
+#include "dc_state_priv.h"
 #include "link.h"
 
 #include "dce100/dce_clk_mgr.h"
@@ -63,7 +64,7 @@ int clk_mgr_helper_get_active_display_cnt(
 		/* Don't count SubVP phantom pipes as part of active
 		 * display count
 		 */
-		if (stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM)
 			continue;
 
 		/*
@@ -368,7 +369,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
 	}
 	break;
 
-#endif /* CONFIG_DRM_AMD_DC_FP - Family RV */
+#endif	/* CONFIG_DRM_AMD_DC_FP */
 	default:
 		ASSERT(0); /* Unknown Asic */
 		break;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
index 3db4ef564b99..ce1386e22576 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
@@ -253,7 +253,7 @@ void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index 7326b7565846..757528256326 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -284,7 +284,7 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
index b2c4f97afc8b..644da4637320 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
@@ -232,7 +232,7 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static void dcn315_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
@@ -334,7 +334,7 @@ static struct wm_table lpddr5_wm_table = {
 		{
 			.wm_inst = WM_A,
 			.wm_type = WM_TYPE_PSTATE_CHG,
-			.pstate_latency_us = 11.65333,
+			.pstate_latency_us = 129.0,
 			.sr_exit_time_us = 11.5,
 			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
@@ -342,7 +342,7 @@ static struct wm_table lpddr5_wm_table = {
 		{
 			.wm_inst = WM_B,
 			.wm_type = WM_TYPE_PSTATE_CHG,
-			.pstate_latency_us = 11.65333,
+			.pstate_latency_us = 129.0,
 			.sr_exit_time_us = 11.5,
 			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
@@ -350,7 +350,7 @@ static struct wm_table lpddr5_wm_table = {
 		{
 			.wm_inst = WM_C,
 			.wm_type = WM_TYPE_PSTATE_CHG,
-			.pstate_latency_us = 11.65333,
+			.pstate_latency_us = 129.0,
 			.sr_exit_time_us = 11.5,
 			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
@@ -358,7 +358,7 @@ static struct wm_table lpddr5_wm_table = {
 		{
 			.wm_inst = WM_D,
 			.wm_type = WM_TYPE_PSTATE_CHG,
-			.pstate_latency_us = 11.65333,
+			.pstate_latency_us = 129.0,
 			.sr_exit_time_us = 11.5,
 			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
index 09151cc56ce4..12f3e8aa46d8 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
@@ -239,7 +239,7 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static void dcn316_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index a496930b1f9c..aadd07bc68c5 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -25,7 +25,6 @@
 
 #include "dccg.h"
 #include "clk_mgr_internal.h"
-
 #include "dcn32/dcn32_clk_mgr_smu_msg.h"
 #include "dcn20/dcn20_clk_mgr.h"
 #include "dce100/dce_clk_mgr.h"
@@ -34,7 +33,7 @@
 #include "core_types.h"
 #include "dm_helpers.h"
 #include "link.h"
-
+#include "dc_state_priv.h"
 #include "atomfirmware.h"
 #include "smu13_driver_if.h"
 
@@ -458,20 +457,56 @@ static int dcn32_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base)
 	return 0;
 }
 
-static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr_internal *clk_mgr)
+static bool dcn32_check_native_scaling(struct pipe_ctx *pipe)
 {
-    unsigned int dispclk_khz_reg    = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK
-    unsigned int dppclk_khz_reg     = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK
-    unsigned int dprefclk_khz_reg   = REG_READ(CLK1_CLK2_CURRENT_CNT); // DPREFCLK
-    unsigned int dcfclk_khz_reg     = REG_READ(CLK1_CLK3_CURRENT_CNT); // DCFCLK
-    unsigned int dtbclk_khz_reg     = REG_READ(CLK1_CLK4_CURRENT_CNT); // DTBCLK
-    unsigned int fclk_khz_reg       = REG_READ(CLK4_CLK0_CURRENT_CNT); // FCLK
+	bool is_native_scaling = false;
+	int width = pipe->plane_state->src_rect.width;
+	int height = pipe->plane_state->src_rect.height;
+
+	if (pipe->stream->timing.h_addressable == width &&
+			pipe->stream->timing.v_addressable == height &&
+			pipe->plane_state->dst_rect.width == width &&
+			pipe->plane_state->dst_rect.height == height)
+		is_native_scaling = true;
+
+	return is_native_scaling;
+}
+
+static void dcn32_auto_dpm_test_log(
+		struct dc_clocks *new_clocks,
+		struct clk_mgr_internal *clk_mgr,
+		struct dc_state *context)
+{
+	unsigned int dispclk_khz_reg, dppclk_khz_reg, dprefclk_khz_reg, dcfclk_khz_reg, dtbclk_khz_reg,
+				 fclk_khz_reg, mall_ss_size_bytes;
+	int dramclk_khz_override, fclk_khz_override, num_fclk_levels;
+
+	struct pipe_ctx *pipe_ctx_list[MAX_PIPES];
+	int active_pipe_count = 0;
+
+	for (int i = 0; i < MAX_PIPES; i++) {
+		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+		if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
+			pipe_ctx_list[active_pipe_count] = pipe_ctx;
+			active_pipe_count++;
+		}
+	}
+
+	mall_ss_size_bytes = context->bw_ctx.bw.dcn.mall_ss_size_bytes;
+
+    dispclk_khz_reg    = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK
+    dppclk_khz_reg     = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK
+    dprefclk_khz_reg   = REG_READ(CLK1_CLK2_CURRENT_CNT); // DPREFCLK
+    dcfclk_khz_reg     = REG_READ(CLK1_CLK3_CURRENT_CNT); // DCFCLK
+    dtbclk_khz_reg     = REG_READ(CLK1_CLK4_CURRENT_CNT); // DTBCLK
+    fclk_khz_reg       = REG_READ(CLK4_CLK0_CURRENT_CNT); // FCLK
 
     // Overrides for these clocks in case there is no p_state change support
-    int dramclk_khz_override = new_clocks->dramclk_khz;
-    int fclk_khz_override = new_clocks->fclk_khz;
+    dramclk_khz_override = new_clocks->dramclk_khz;
+    fclk_khz_override = new_clocks->fclk_khz;
 
-    int num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1;
+    num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1;
 
     if (!new_clocks->p_state_change_support) {
 	    dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 1000;
@@ -488,16 +523,49 @@ static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr
 	//
 	//				AutoDPMTest: clk1:%d - clk2:%d - clk3:%d - clk4:%d\n"
 	////////////////////////////////////////////////////////////////////////////
-	if (new_clocks &&
+	if (new_clocks && active_pipe_count > 0 &&
 		new_clocks->dramclk_khz > 0 &&
 		new_clocks->fclk_khz > 0 &&
 		new_clocks->dcfclk_khz > 0 &&
 		new_clocks->dppclk_khz > 0) {
 
+		uint32_t pix_clk_list[MAX_PIPES] = {0};
+		int p_state_list[MAX_PIPES] = {0};
+		int disp_src_width_list[MAX_PIPES] = {0};
+		int disp_src_height_list[MAX_PIPES] = {0};
+		uint64_t disp_src_refresh_list[MAX_PIPES] = {0};
+		bool is_scaled_list[MAX_PIPES] = {0};
+
+		for (int i = 0; i < active_pipe_count; i++) {
+			struct pipe_ctx *curr_pipe_ctx = pipe_ctx_list[i];
+			uint64_t refresh_rate;
+
+			pix_clk_list[i] = curr_pipe_ctx->stream->timing.pix_clk_100hz;
+			p_state_list[i] = curr_pipe_ctx->p_state_type;
+
+			refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 +
+				curr_pipe_ctx->stream->timing.v_total * curr_pipe_ctx->stream->timing.h_total - (uint64_t)1);
+			refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total);
+			refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total);
+			disp_src_refresh_list[i] = refresh_rate;
+
+			if (curr_pipe_ctx->plane_state) {
+				is_scaled_list[i] = !(dcn32_check_native_scaling(curr_pipe_ctx));
+				disp_src_width_list[i] = curr_pipe_ctx->plane_state->src_rect.width;
+				disp_src_height_list[i] = curr_pipe_ctx->plane_state->src_rect.height;
+			}
+		}
+
 		DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - "
 			"dcfclk:%d - dppclk:%d - dispclk_hw:%d - "
 			"dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - "
-			"dtbclk_hw:%d - fclk_hw:%d\n",
+			"dtbclk_hw:%d - fclk_hw:%d - pix_clk_0:%d - pix_clk_1:%d - "
+			"pix_clk_2:%d - pix_clk_3:%d - mall_ss_size:%d - p_state_type_0:%d - "
+			"p_state_type_1:%d - p_state_type_2:%d - p_state_type_3:%d - "
+			"pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - "
+			"pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - "
+			"pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - "
+			"pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d - LOG_END\n",
 			dramclk_khz_override,
 			fclk_khz_override,
 			new_clocks->dcfclk_khz,
@@ -507,7 +575,14 @@ static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr
 			dprefclk_khz_reg,
 			dcfclk_khz_reg,
 			dtbclk_khz_reg,
-			fclk_khz_reg);
+			fclk_khz_reg,
+			pix_clk_list[0], pix_clk_list[1], pix_clk_list[3], pix_clk_list[2],
+			mall_ss_size_bytes,
+			p_state_list[0], p_state_list[1], p_state_list[2], p_state_list[3],
+			disp_src_width_list[0], disp_src_height_list[0], disp_src_refresh_list[0], is_scaled_list[0],
+			disp_src_width_list[1], disp_src_height_list[1], disp_src_refresh_list[1], is_scaled_list[1],
+			disp_src_width_list[2], disp_src_height_list[2], disp_src_refresh_list[2], is_scaled_list[2],
+			disp_src_width_list[3], disp_src_height_list[3], disp_src_refresh_list[3], is_scaled_list[3]);
 	}
 }
 
@@ -680,6 +755,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
 		/* DCCG requires KHz precision for DTBCLK */
 		clk_mgr_base->clks.ref_dtbclk_khz =
 				dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz));
+
 		dcn32_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
 	}
 
@@ -708,7 +784,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
 				clk_mgr_base->clks.dispclk_khz / 1000 / 7);
 
 	if (dc->config.enable_auto_dpm_test_logs) {
-	    dcn32_auto_dpm_test_log(new_clocks, clk_mgr);
+	    dcn32_auto_dpm_test_log(new_clocks, clk_mgr, context);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 507a7cf56711..9c660d1facc7 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -50,6 +50,7 @@
 #include "dc_dmub_srv.h"
 #include "link.h"
 #include "logger_types.h"
+
 #undef DC_LOGGER
 #define DC_LOGGER \
 	clk_mgr->base.base.ctx->logger
@@ -80,12 +81,12 @@
 
 static int dcn35_get_active_display_cnt_wa(
 		struct dc *dc,
-		struct dc_state *context)
+		struct dc_state *context,
+		int *all_active_disps)
 {
-	int i, display_count;
+	int i, display_count = 0;
 	bool tmds_present = false;
 
-	display_count = 0;
 	for (i = 0; i < context->stream_count; i++) {
 		const struct dc_stream_state *stream = context->streams[i];
 
@@ -103,7 +104,8 @@ static int dcn35_get_active_display_cnt_wa(
 				link->link_enc->funcs->is_dig_enabled(link->link_enc))
 			display_count++;
 	}
-
+	if (all_active_disps != NULL)
+		*all_active_disps = display_count;
 	/* WA for hang on HDMI after display off back on*/
 	if (display_count == 0 && tmds_present)
 		display_count = 1;
@@ -126,21 +128,13 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *
 			continue;
 		if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
 				     !pipe->stream->link_enc)) {
-			struct stream_encoder *stream_enc = pipe->stream_res.stream_enc;
-
 			if (disable) {
-				if (stream_enc && stream_enc->funcs->disable_fifo)
-					pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc);
-
 				if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
 					pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
 
 				reset_sync_context_for_pipe(dc, context, i);
 			} else {
 				pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
-
-				if (stream_enc && stream_enc->funcs->enable_fifo)
-					pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc);
 			}
 		}
 	}
@@ -224,14 +218,19 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
 	struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
 	struct dc *dc = clk_mgr_base->ctx->dc;
-	int display_count;
+	int display_count = 0;
 	bool update_dppclk = false;
 	bool update_dispclk = false;
 	bool dpp_clock_lowered = false;
+	int all_active_disps = 0;
 
 	if (dc->work_arounds.skip_clock_update)
 		return;
 
+	display_count = dcn35_get_active_display_cnt_wa(dc, context, &all_active_disps);
+	if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz)
+		new_clocks->ref_dtbclk_khz = 600000;
+
 	/*
 	 * if it is safe to lower, but we are already in the lower state, we don't have to do anything
 	 * also if safe to lower is false, we just go in the higher state
@@ -250,7 +249,6 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 		}
 		/* check that we're not already in lower */
 		if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
-			display_count = dcn35_get_active_display_cnt_wa(dc, context);
 			/* if we can go lower, go lower */
 			if (display_count == 0)
 				clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
@@ -265,8 +263,10 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 
 		if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
 			dcn35_smu_set_dtbclk(clk_mgr, true);
-			dcn35_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
 			clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+
+			dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz);
+			clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
 		}
 
 		/* check that we're not already in D0 */
@@ -314,17 +314,12 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 		update_dispclk = true;
 	}
 
-	if (!new_clocks->dtbclk_en) {
-		new_clocks->ref_dtbclk_khz = 600000;
-	}
-
 	/* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */
 	if (!dc->debug.disable_dtb_ref_clk_switch &&
-			should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000)) {
-		/* DCCG requires KHz precision for DTBCLK */
-		dcn35_smu_set_dtbclk(clk_mgr, true);
-
-		dcn35_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
+	    should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000,
+			     clk_mgr_base->clks.ref_dtbclk_khz / 1000)) {
+		dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz);
+		clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
 	}
 
 	if (dpp_clock_lowered) {
@@ -348,7 +343,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
 	cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
 	cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
@@ -423,9 +418,8 @@ bool dcn35_are_clock_states_equal(struct dc_clocks *a,
 }
 
 static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
-		struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info)
+		struct clk_mgr_dcn35 *clk_mgr)
 {
-
 }
 
 static struct clk_bw_params dcn35_bw_params = {
@@ -443,32 +437,32 @@ static struct wm_table ddr5_wm_table = {
 			.wm_inst = WM_A,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_B,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_C,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_D,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 	}
@@ -480,32 +474,32 @@ static struct wm_table lpddr5_wm_table = {
 			.wm_inst = WM_A,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_B,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_C,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_D,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 14.0,
+			.sr_enter_plus_exit_time_us = 16.0,
 			.valid = true,
 		},
 	}
@@ -515,11 +509,6 @@ static DpmClocks_t_dcn35 dummy_clocks;
 
 static struct dcn35_watermarks dummy_wms = { 0 };
 
-static struct dcn35_ss_info_table ss_info_table = {
-	.ss_divider = 1000,
-	.ss_percentage = {0, 0, 375, 375, 375}
-};
-
 static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table)
 {
 	int i, num_valid_sets;
@@ -653,27 +642,47 @@ static unsigned int convert_wck_ratio(uint8_t wck_ratio)
 	return 1;
 }
 
+static inline uint32_t calc_dram_speed_mts(const MemPstateTable_t *entry)
+{
+	return entry->UClk * convert_wck_ratio(entry->WckRatio) * 2;
+}
+
 static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr,
 						    struct integrated_info *bios_info,
 						    DpmClocks_t_dcn35 *clock_table)
 {
 	struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
 	struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
-	uint32_t max_pstate = 0,  max_uclk = 0, max_fclk = 0;
-	uint32_t min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+	uint32_t max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+	uint32_t max_pstate = 0, max_dram_speed_mts = 0, min_dram_speed_mts = 0;
 	int i;
 
+	/* Determine min/max p-state values. */
 	for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) {
-		if (is_valid_clock_value(clock_table->MemPstateTable[i].UClk) &&
-		    clock_table->MemPstateTable[i].UClk > max_uclk) {
-			max_uclk = clock_table->MemPstateTable[i].UClk;
+		uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
+
+		if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts > max_dram_speed_mts) {
+			max_dram_speed_mts = dram_speed_mts;
 			max_pstate = i;
 		}
 	}
 
-	/* We expect the table to contain at least one valid Uclk entry. */
-	ASSERT(is_valid_clock_value(max_uclk));
+	min_dram_speed_mts = max_dram_speed_mts;
+	min_pstate = max_pstate;
 
+	for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) {
+		uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
+
+		if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts < min_dram_speed_mts) {
+			min_dram_speed_mts = dram_speed_mts;
+			min_pstate = i;
+		}
+	}
+
+	/* We expect the table to contain at least one valid P-state entry. */
+	ASSERT(clock_table->NumMemPstatesEnabled &&
+	       is_valid_clock_value(max_dram_speed_mts) &&
+	       is_valid_clock_value(min_dram_speed_mts));
 
 	/* dispclk and dppclk can be max at any voltage, same number of levels for both */
 	if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
@@ -683,47 +692,46 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
 		max_dppclk = find_max_clk_value(clock_table->DppClocks,
 			clock_table->NumDispClkLevelsEnabled);
 	} else {
+		/* Invalid number of entries in the table from PMFW. */
 		ASSERT(0);
 	}
-	if (clock_table->NumFclkLevelsEnabled <= NUM_FCLK_DPM_LEVELS)
-		max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq,
-			clock_table->NumFclkLevelsEnabled);
 
-	for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) {
-		uint32_t min_uclk = clock_table->MemPstateTable[0].UClk;
-		int j;
+	/* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */
+	ASSERT(clock_table->NumDcfClkLevelsEnabled > 0);
 
-		for (j = 1; j < clock_table->NumMemPstatesEnabled; j++) {
-			if (is_valid_clock_value(clock_table->MemPstateTable[j].UClk) &&
-			    clock_table->MemPstateTable[j].UClk < min_uclk &&
-			    clock_table->MemPstateTable[j].Voltage <= clock_table->SocVoltage[i]) {
-				min_uclk = clock_table->MemPstateTable[j].UClk;
-				min_pstate = j;
-			}
-		}
+	max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, clock_table->NumFclkLevelsEnabled);
 
+	for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) {
+		int j;
+
+		/* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
 		for (j = bw_params->clk_table.num_entries - 1; j > 0; j--)
 			if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i])
-			break;
+				break;
 
 		bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz;
 		bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz;
 		bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz;
-		bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
+
+		/* Now update clocks we do read */
 		bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[min_pstate].MemClk;
 		bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[min_pstate].Voltage;
 		bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
 		bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
 		bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
 		bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
-		bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
-			clock_table->MemPstateTable[min_pstate].WckRatio);
-		}
+		bw_params->clk_table.entries[i].wck_ratio =
+			convert_wck_ratio(clock_table->MemPstateTable[min_pstate].WckRatio);
+
+		/* Dcfclk and Fclk are tied, but at a different ratio */
+		bw_params->clk_table.entries[i].fclk_mhz = min(max_fclk, 2 * clock_table->DcfClocks[i]);
+	}
 
 	/* Make sure to include at least one entry at highest pstate */
 	if (max_pstate != min_pstate || i == 0) {
 		if (i > MAX_NUM_DPM_LVL - 1)
 			i = MAX_NUM_DPM_LVL - 1;
+
 		bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
 		bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[max_pstate].MemClk;
 		bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[max_pstate].Voltage;
@@ -739,6 +747,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
 	}
 	bw_params->clk_table.num_entries = i--;
 
+	/* Make sure all highest clocks are included*/
 	bw_params->clk_table.entries[i].socclk_mhz =
 		find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
 	bw_params->clk_table.entries[i].dispclk_mhz =
@@ -757,6 +766,11 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
 	bw_params->clk_table.num_entries_per_clk.num_fclk_levels = clock_table->NumFclkLevelsEnabled;
 	bw_params->clk_table.num_entries_per_clk.num_memclk_levels = clock_table->NumMemPstatesEnabled;
 	bw_params->clk_table.num_entries_per_clk.num_socclk_levels = clock_table->NumSocClkLevelsEnabled;
+
+	/*
+	 * Set any 0 clocks to max default setting. Not an issue for
+	 * power since we aren't doing switching in such case anyway
+	 */
 	for (i = 0; i < bw_params->clk_table.num_entries; i++) {
 		if (!bw_params->clk_table.entries[i].fclk_mhz) {
 			bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
@@ -805,7 +819,7 @@ static void dcn35_set_low_power_state(struct clk_mgr *clk_mgr_base)
 	struct dc_state *context = dc->current_state;
 
 	if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
-		display_count = dcn35_get_active_display_cnt_wa(dc, context);
+		display_count = dcn35_get_active_display_cnt_wa(dc, context, NULL);
 		/* if we can go lower, go lower */
 		if (display_count == 0)
 			clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
@@ -965,21 +979,6 @@ struct clk_mgr_funcs dcn35_fpga_funcs = {
 	.get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
 };
 
-static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
-{
-	uint32_t clock_source;
-	struct dc_context *ctx = clk_mgr->base.ctx;
-
-	REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
-
-	clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
-
-	if (clk_mgr->dprefclk_ss_percentage != 0) {
-		clk_mgr->ss_on_dprefclk = true;
-		clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
-	}
-}
-
 void dcn35_clk_mgr_construct(
 		struct dc_context *ctx,
 		struct clk_mgr_dcn35 *clk_mgr,
@@ -987,7 +986,6 @@ void dcn35_clk_mgr_construct(
 		struct dccg *dccg)
 {
 	struct dcn35_smu_dpm_clks smu_dpm_clks = { 0 };
-	struct clk_log_info log_info = {0};
 	clk_mgr->base.base.ctx = ctx;
 	clk_mgr->base.base.funcs = &dcn35_funcs;
 
@@ -1040,20 +1038,14 @@ void dcn35_clk_mgr_construct(
 		dcn35_bw_params.wm_table = ddr5_wm_table;
 	}
 	/* Saved clocks configured at boot for debug purposes */
-	dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info);
+	dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr);
 
 	clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base);
-	clk_mgr->base.base.clks.ref_dtbclk_khz = dcn35_smu_get_dtbclk(&clk_mgr->base);
-
-	if (!clk_mgr->base.base.clks.ref_dtbclk_khz)
-		dcn35_smu_set_dtbclk(&clk_mgr->base, true);
+	clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
 
-	clk_mgr->base.base.clks.dtbclk_en = true;
 	dce_clock_read_ss_info(&clk_mgr->base);
 	/*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/
 
-	dcn35_read_ss_info_from_lut(&clk_mgr->base);
-
 	clk_mgr->base.base.bw_params = &dcn35_bw_params;
 
 	if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
@@ -1129,7 +1121,6 @@ void dcn35_clk_mgr_construct(
 			ctx->dc->debug.disable_dpp_power_gate = false;
 			ctx->dc->debug.disable_hubp_power_gate = false;
 			ctx->dc->debug.disable_dsc_power_gate = false;
-			ctx->dc->debug.disable_hpo_power_gate = false;
 		} else {
 			/*let's reset the config control flag*/
 			ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
index b6b8c3ca1572..6d4a1ffab5ed 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
@@ -116,6 +116,9 @@ static uint32_t dcn35_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un
 			msleep(delay_us/1000);
 		else if (delay_us > 0)
 			udelay(delay_us);
+
+		if (clk_mgr->base.ctx->dc->debug.disable_timeout)
+			max_retries++;
 	} while (max_retries--);
 
 	return res_val;
@@ -276,7 +279,7 @@ void dcn35_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, u
 		clk_mgr,
 		VBIOSSMC_MSG_SetDisplayIdleOptimizations,
 		idle_info);
-	smu_print("VBIOSSMC_MSG_SetDisplayIdleOptimizations idle_info  = %d\n", idle_info);
+	smu_print("%s: VBIOSSMC_MSG_SetDisplayIdleOptimizations idle_info  = %x\n", __func__, idle_info);
 }
 
 void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
@@ -295,7 +298,7 @@ void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool e
 			clk_mgr,
 			VBIOSSMC_MSG_SetDisplayIdleOptimizations,
 			idle_info.data);
-	smu_print("dcn35_smu_enable_phy_refclk_pwrdwn  = %d\n", enable ? 1 : 0);
+	smu_print("%s smu_enable_phy_refclk_pwrdwn  = %d\n", __func__, enable ? 1 : 0);
 }
 
 void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
@@ -307,6 +310,7 @@ void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
 			clk_mgr,
 			VBIOSSMC_MSG_UpdatePmeRestore,
 			0);
+	smu_print("%s: SMC_MSG_UpdatePmeRestore\n", __func__);
 }
 
 void dcn35_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high)
@@ -347,7 +351,7 @@ void dcn35_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
 
 void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support)
 {
-	unsigned int msg_id, param;
+	unsigned int msg_id, param, retv;
 
 	if (!clk_mgr->smu_present)
 		return;
@@ -357,27 +361,32 @@ void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst
 	case DCN_ZSTATE_SUPPORT_ALLOW:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 10) | (1 << 9) | (1 << 8);
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW, param = %d\n", __func__, param);
 		break;
 
 	case DCN_ZSTATE_SUPPORT_DISALLOW:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = 0;
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg_id = DISALLOW, param = %d\n",  __func__, param);
 		break;
 
 
 	case DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 10);
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z10_ONLY, param = %d\n", __func__, param);
 		break;
 
 	case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 10) | (1 << 8);
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_Z10_ONLY, param = %d\n", __func__, param);
 		break;
 
 	case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY:
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = (1 << 8);
+		smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_ONLY, param = %d\n", __func__, param);
 		break;
 
 	default: //DCN_ZSTATE_SUPPORT_UNKNOWN
@@ -387,11 +396,11 @@ void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst
 	}
 
 
-	dcn35_smu_send_msg_with_param(
+	retv = dcn35_smu_send_msg_with_param(
 		clk_mgr,
 		msg_id,
 		param);
-	smu_print("dcn35_smu_set_zstate_support msg_id = %d, param = %d\n", msg_id, param);
+	smu_print("%s:  msg_id = %d, param = 0x%x, return = %d\n", __func__, msg_id, param, retv);
 }
 
 int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr)
@@ -405,7 +414,7 @@ int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr)
 						 VBIOSSMC_MSG_GetDprefclkFreq,
 						 0);
 
-	smu_print("dcn35_smu_get_DPREF clk  = %d mhz\n", dprefclk);
+	smu_print("%s:  SMU DPREF clk  = %d mhz\n",  __func__, dprefclk);
 	return dprefclk * 1000;
 }
 
@@ -420,7 +429,7 @@ int dcn35_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr)
 					       VBIOSSMC_MSG_GetDtbclkFreq,
 					       0);
 
-	smu_print("dcn35_smu_get_dtbclk  = %d mhz\n", dtbclk);
+	smu_print("%s: get_dtbclk  = %dmhz\n", __func__, dtbclk);
 	return dtbclk * 1000;
 }
 /* Arg = 1: Turn DTB on; 0: Turn DTB CLK OFF. when it is on, it is 600MHZ */
@@ -433,7 +442,7 @@ void dcn35_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable)
 			clk_mgr,
 			VBIOSSMC_MSG_SetDtbClk,
 			enable);
-	smu_print("dcn35_smu_set_dtbclk  = %d \n", enable ? 1 : 0);
+	smu_print("%s: smu_set_dtbclk = %d\n", __func__, enable ? 1 : 0);
 }
 
 void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
@@ -442,30 +451,45 @@ void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *cl
 			clk_mgr,
 			VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown,
 			enable);
+	smu_print("%s: smu_enable_48mhz_tmdp_refclk_pwrdwn = %d\n", __func__, enable ? 1 : 0);
 }
 
 int dcn35_smu_exit_low_power_state(struct clk_mgr_internal *clk_mgr)
 {
-	return dcn35_smu_send_msg_with_param(
+	int retv;
+
+	retv = dcn35_smu_send_msg_with_param(
 		clk_mgr,
 		VBIOSSMC_MSG_DispPsrExit,
 		0);
+	smu_print("%s: smu_exit_low_power_state return = %d\n", __func__, retv);
+	return retv;
 }
 
 int dcn35_smu_get_ips_supported(struct clk_mgr_internal *clk_mgr)
 {
-	return dcn35_smu_send_msg_with_param(
+	int retv;
+
+	retv = dcn35_smu_send_msg_with_param(
 			clk_mgr,
 			VBIOSSMC_MSG_QueryIPS2Support,
 			0);
+
+	//smu_print("%s: VBIOSSMC_MSG_QueryIPS2Support return = %x\n", __func__, retv);
+	return retv;
 }
 
 void dcn35_smu_write_ips_scratch(struct clk_mgr_internal *clk_mgr, uint32_t param)
 {
 	REG_WRITE(MP1_SMN_C2PMSG_71, param);
+	//smu_print("%s: write_ips_scratch = %x\n", __func__, param);
 }
 
 uint32_t dcn35_smu_read_ips_scratch(struct clk_mgr_internal *clk_mgr)
 {
-	return REG_READ(MP1_SMN_C2PMSG_71);
+	uint32_t retv;
+
+	retv = REG_READ(MP1_SMN_C2PMSG_71);
+	//smu_print("%s: dcn35_smu_read_ips_scratch = %x\n",  __func__, retv);
+	return retv;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 76b47f178127..2d7205058c64 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -34,6 +34,8 @@
 #include "dce/dce_hwseq.h"
 
 #include "resource.h"
+#include "dc_state.h"
+#include "dc_state_priv.h"
 
 #include "gpio_service_interface.h"
 #include "clk_mgr.h"
@@ -409,9 +411,12 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
 	 * avoid conflicting with firmware updates.
 	 */
 	if (dc->ctx->dce_version > DCE_VERSION_MAX)
-		if (dc->optimized_required || dc->wm_optimized_required)
+		if (dc->optimized_required)
 			return false;
 
+	if (!memcmp(&stream->adjust, adjust, sizeof(*adjust)))
+		return true;
+
 	stream->adjust.v_total_max = adjust->v_total_max;
 	stream->adjust.v_total_mid = adjust->v_total_mid;
 	stream->adjust.v_total_mid_frame_num = adjust->v_total_mid_frame_num;
@@ -519,7 +524,7 @@ dc_stream_forward_dmub_crc_window(struct dc_dmub_srv *dmub_srv,
 		cmd.secure_display.roi_info.y_end = rect->y + rect->height;
 	}
 
-	dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 }
 
 static inline void
@@ -808,7 +813,7 @@ static void dc_destruct(struct dc *dc)
 		link_enc_cfg_init(dc, dc->current_state);
 
 	if (dc->current_state) {
-		dc_release_state(dc->current_state);
+		dc_state_release(dc->current_state);
 		dc->current_state = NULL;
 	}
 
@@ -1020,29 +1025,27 @@ static bool dc_construct(struct dc *dc,
 	}
 #endif
 
+	if (!create_links(dc, init_params->num_virtual_links))
+		goto fail;
+
+	/* Create additional DIG link encoder objects if fewer than the platform
+	 * supports were created during link construction.
+	 */
+	if (!create_link_encoders(dc))
+		goto fail;
+
 	/* Creation of current_state must occur after dc->dml
 	 * is initialized in dc_create_resource_pool because
 	 * on creation it copies the contents of dc->dml
 	 */
 
-	dc->current_state = dc_create_state(dc);
+	dc->current_state = dc_state_create(dc);
 
 	if (!dc->current_state) {
 		dm_error("%s: failed to create validate ctx\n", __func__);
 		goto fail;
 	}
 
-	if (!create_links(dc, init_params->num_virtual_links))
-		goto fail;
-
-	/* Create additional DIG link encoder objects if fewer than the platform
-	 * supports were created during link construction.
-	 */
-	if (!create_link_encoders(dc))
-		goto fail;
-
-	dc_resource_state_construct(dc, dc->current_state);
-
 	return true;
 
 fail:
@@ -1085,7 +1088,7 @@ static void apply_ctx_interdependent_lock(struct dc *dc,
 	}
 }
 
-static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
+static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
 {
 	if (dc->ctx->dce_version >= DCN_VERSION_1_0) {
 		memset(&pipe_ctx->visual_confirm_color, 0, sizeof(struct tg_color));
@@ -1105,9 +1108,9 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte
 			if (dc->debug.visual_confirm == VISUAL_CONFIRM_MPCTREE)
 				get_mpctree_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
 			else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SUBVP)
-				get_subvp_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color));
+				get_subvp_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
 			else if (dc->debug.visual_confirm == VISUAL_CONFIRM_MCLK_SWITCH)
-				get_mclk_switch_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color));
+				get_mclk_switch_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
 		}
 	}
 }
@@ -1115,7 +1118,7 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte
 static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 {
 	int i, j;
-	struct dc_state *dangling_context = dc_create_state(dc);
+	struct dc_state *dangling_context = dc_state_create_current_copy(dc);
 	struct dc_state *current_ctx;
 	struct pipe_ctx *pipe;
 	struct timing_generator *tg;
@@ -1123,8 +1126,6 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 	if (dangling_context == NULL)
 		return;
 
-	dc_resource_state_copy_construct(dc->current_state, dangling_context);
-
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct dc_stream_state *old_stream =
 				dc->current_state->res_ctx.pipe_ctx[i].stream;
@@ -1161,6 +1162,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 		}
 
 		if (should_disable && old_stream) {
+			bool is_phantom = dc_state_get_stream_subvp_type(dc->current_state, old_stream) == SUBVP_PHANTOM;
 			pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 			tg = pipe->stream_res.tg;
 			/* When disabling plane for a phantom pipe, we must turn on the
@@ -1169,22 +1171,29 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 			 * state that can result in underflow or hang when enabling it
 			 * again for different use.
 			 */
-			if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) {
+			if (is_phantom) {
 				if (tg->funcs->enable_crtc) {
 					int main_pipe_width, main_pipe_height;
+					struct dc_stream_state *old_paired_stream = dc_state_get_paired_subvp_stream(dc->current_state, old_stream);
 
-					main_pipe_width = old_stream->mall_stream_config.paired_stream->dst.width;
-					main_pipe_height = old_stream->mall_stream_config.paired_stream->dst.height;
+					main_pipe_width = old_paired_stream->dst.width;
+					main_pipe_height = old_paired_stream->dst.height;
 					if (dc->hwss.blank_phantom)
 						dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height);
 					tg->funcs->enable_crtc(tg);
 				}
 			}
-			dc_rem_all_planes_for_stream(dc, old_stream, dangling_context);
+
+			if (is_phantom)
+				dc_state_rem_all_phantom_planes_for_stream(dc, old_stream, dangling_context, true);
+			else
+				dc_state_rem_all_planes_for_stream(dc, old_stream, dangling_context);
 			disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context);
 
-			if (pipe->stream && pipe->plane_state)
-				dc_update_viusal_confirm_color(dc, context, pipe);
+			if (pipe->stream && pipe->plane_state) {
+				set_p_state_switch_method(dc, context, pipe);
+				dc_update_visual_confirm_color(dc, context, pipe);
+			}
 
 			if (dc->hwss.apply_ctx_for_surface) {
 				apply_ctx_interdependent_lock(dc, dc->current_state, old_stream, true);
@@ -1203,7 +1212,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 			 * The OTG is set to disable on falling edge of VUPDATE so the plane disable
 			 * will still get it's double buffer update.
 			 */
-			if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) {
+			if (is_phantom) {
 				if (tg->funcs->disable_phantom_crtc)
 					tg->funcs->disable_phantom_crtc(tg);
 			}
@@ -1212,7 +1221,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 
 	current_ctx = dc->current_state;
 	dc->current_state = dangling_context;
-	dc_release_state(current_ctx);
+	dc_state_release(current_ctx);
 }
 
 static void disable_vbios_mode_if_required(
@@ -1284,7 +1293,7 @@ static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context)
 		int count = 0;
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (!pipe->plane_state || pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM)
 			continue;
 
 		/* Timeout 100 ms */
@@ -1510,7 +1519,7 @@ static void program_timing_sync(
 		}
 
 		for (k = 0; k < group_size; k++) {
-			struct dc_stream_status *status = dc_stream_get_status_from_state(ctx, pipe_set[k]->stream);
+			struct dc_stream_status *status = dc_state_get_stream_status(ctx, pipe_set[k]->stream);
 
 			status->timing_sync_info.group_id = num_group;
 			status->timing_sync_info.group_size = group_size;
@@ -1521,7 +1530,7 @@ static void program_timing_sync(
 
 		}
 
-		/* remove any other pipes that are already been synced */
+		/* remove any other unblanked pipes as they have already been synced */
 		if (dc->config.use_pipe_ctx_sync_logic) {
 			/* check pipe's syncd to decide which pipe to be removed */
 			for (j = 1; j < group_size; j++) {
@@ -1534,6 +1543,7 @@ static void program_timing_sync(
 					pipe_set[j]->pipe_idx_syncd = pipe_set[0]->pipe_idx_syncd;
 			}
 		} else {
+			/* remove any other pipes by checking valid plane */
 			for (j = j + 1; j < group_size; j++) {
 				bool is_blanked;
 
@@ -1554,7 +1564,7 @@ static void program_timing_sync(
 		if (group_size > 1) {
 			if (sync_type == TIMING_SYNCHRONIZABLE) {
 				dc->hwss.enable_timing_synchronization(
-					dc, group_index, group_size, pipe_set);
+					dc, ctx, group_index, group_size, pipe_set);
 			} else
 				if (sync_type == VBLANK_SYNCHRONIZABLE) {
 				dc->hwss.enable_vblanks_synchronization(
@@ -1836,7 +1846,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 		struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
 		/* Check old context for SubVP */
-		subvp_prev_use |= (old_pipe->stream && old_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM);
+		subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM);
 		if (subvp_prev_use)
 			break;
 	}
@@ -1964,6 +1974,10 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 		wait_for_no_pipes_pending(dc, context);
 		/* pplib is notified if disp_num changed */
 		dc->hwss.optimize_bandwidth(dc, context);
+		/* Need to do otg sync again as otg could be out of sync due to otg
+		 * workaround applied during clock update
+		 */
+		dc_trigger_sync(dc, context);
 	}
 
 	if (dc->hwss.update_dsc_pg)
@@ -1990,9 +2004,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 	old_state = dc->current_state;
 	dc->current_state = context;
 
-	dc_release_state(old_state);
+	dc_state_release(old_state);
 
-	dc_retain_state(dc->current_state);
+	dc_state_retain(dc->current_state);
 
 	return result;
 }
@@ -2063,12 +2077,10 @@ enum dc_status dc_commit_streams(struct dc *dc,
 	if (handle_exit_odm2to1)
 		res = commit_minimal_transition_state(dc, dc->current_state);
 
-	context = dc_create_state(dc);
+	context = dc_state_create_current_copy(dc);
 	if (!context)
 		goto context_alloc_fail;
 
-	dc_resource_state_copy_construct_current(dc, context);
-
 	res = dc_validate_with_context(dc, set, stream_count, context, false);
 	if (res != DC_OK) {
 		BREAK_TO_DEBUGGER();
@@ -2083,7 +2095,7 @@ enum dc_status dc_commit_streams(struct dc *dc,
 				streams[i]->out.otg_offset = context->stream_status[j].primary_otg_inst;
 
 			if (dc_is_embedded_signal(streams[i]->signal)) {
-				struct dc_stream_status *status = dc_stream_get_status_from_state(context, streams[i]);
+				struct dc_stream_status *status = dc_state_get_stream_status(context, streams[i]);
 
 				if (dc->hwss.is_abm_supported)
 					status->is_abm_supported = dc->hwss.is_abm_supported(dc, context, streams[i]);
@@ -2094,7 +2106,7 @@ enum dc_status dc_commit_streams(struct dc *dc,
 	}
 
 fail:
-	dc_release_state(context);
+	dc_state_release(context);
 
 context_alloc_fail:
 
@@ -2148,7 +2160,7 @@ static bool is_flip_pending_in_pipes(struct dc *dc, struct dc_state *context)
 		pipe = &context->res_ctx.pipe_ctx[i];
 
 		// Don't check flip pending on phantom pipes
-		if (!pipe->plane_state || (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM))
+		if (!pipe->plane_state || (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM))
 			continue;
 
 		/* Must set to false to start with, due to OR in update function */
@@ -2206,7 +2218,7 @@ void dc_post_update_surfaces_to_stream(struct dc *dc)
 			if (context->res_ctx.pipe_ctx[i].stream == NULL ||
 					context->res_ctx.pipe_ctx[i].plane_state == NULL) {
 				context->res_ctx.pipe_ctx[i].pipe_idx = i;
-				dc->hwss.disable_plane(dc, &context->res_ctx.pipe_ctx[i]);
+				dc->hwss.disable_plane(dc, context, &context->res_ctx.pipe_ctx[i]);
 			}
 
 		process_deferred_updates(dc);
@@ -2218,111 +2230,6 @@ void dc_post_update_surfaces_to_stream(struct dc *dc)
 	}
 
 	dc->optimized_required = false;
-	dc->wm_optimized_required = false;
-}
-
-static void init_state(struct dc *dc, struct dc_state *context)
-{
-	/* Each context must have their own instance of VBA and in order to
-	 * initialize and obtain IP and SOC the base DML instance from DC is
-	 * initially copied into every context
-	 */
-	memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
-}
-
-struct dc_state *dc_create_state(struct dc *dc)
-{
-	struct dc_state *context = kvzalloc(sizeof(struct dc_state),
-					    GFP_KERNEL);
-
-	if (!context)
-		return NULL;
-
-	init_state(dc, context);
-
-#ifdef CONFIG_DRM_AMD_DC_FP
-	if (dc->debug.using_dml2) {
-		dml2_create(dc, &dc->dml2_options, &context->bw_ctx.dml2);
-	}
-#endif
-	kref_init(&context->refcount);
-
-	return context;
-}
-
-struct dc_state *dc_copy_state(struct dc_state *src_ctx)
-{
-	int i, j;
-	struct dc_state *new_ctx = kvmalloc(sizeof(struct dc_state), GFP_KERNEL);
-#ifdef CONFIG_DRM_AMD_DC_FP
-	struct dml2_context *dml2 =  NULL;
-#endif
-
-	if (!new_ctx)
-		return NULL;
-	memcpy(new_ctx, src_ctx, sizeof(struct dc_state));
-
-#ifdef CONFIG_DRM_AMD_DC_FP
-	if (new_ctx->bw_ctx.dml2) {
-		dml2 = kzalloc(sizeof(struct dml2_context), GFP_KERNEL);
-		if (!dml2)
-			return NULL;
-
-		memcpy(dml2, src_ctx->bw_ctx.dml2, sizeof(struct dml2_context));
-		new_ctx->bw_ctx.dml2 = dml2;
-	}
-#endif
-
-	for (i = 0; i < MAX_PIPES; i++) {
-			struct pipe_ctx *cur_pipe = &new_ctx->res_ctx.pipe_ctx[i];
-
-			if (cur_pipe->top_pipe)
-				cur_pipe->top_pipe =  &new_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
-
-			if (cur_pipe->bottom_pipe)
-				cur_pipe->bottom_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
-
-			if (cur_pipe->prev_odm_pipe)
-				cur_pipe->prev_odm_pipe =  &new_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
-
-			if (cur_pipe->next_odm_pipe)
-				cur_pipe->next_odm_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
-
-	}
-
-	for (i = 0; i < new_ctx->stream_count; i++) {
-			dc_stream_retain(new_ctx->streams[i]);
-			for (j = 0; j < new_ctx->stream_status[i].plane_count; j++)
-				dc_plane_state_retain(
-					new_ctx->stream_status[i].plane_states[j]);
-	}
-
-	kref_init(&new_ctx->refcount);
-
-	return new_ctx;
-}
-
-void dc_retain_state(struct dc_state *context)
-{
-	kref_get(&context->refcount);
-}
-
-static void dc_state_free(struct kref *kref)
-{
-	struct dc_state *context = container_of(kref, struct dc_state, refcount);
-	dc_resource_state_destruct(context);
-
-#ifdef CONFIG_DRM_AMD_DC_FP
-	dml2_destroy(context->bw_ctx.dml2);
-	context->bw_ctx.dml2 = 0;
-#endif
-
-	kvfree(context);
-}
-
-void dc_release_state(struct dc_state *context)
-{
-	kref_put(&context->refcount, dc_state_free);
 }
 
 bool dc_set_generic_gpio_for_stereo(bool enable,
@@ -2745,8 +2652,6 @@ enum surface_update_type dc_check_update_surfaces_for_stream(
 		} else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) {
 			dc->optimized_required = true;
 		}
-
-		dc->optimized_required |= dc->wm_optimized_required;
 	}
 
 	return type;
@@ -2954,9 +2859,6 @@ static void copy_stream_update_to_stream(struct dc *dc,
 	if (update->vrr_active_fixed)
 		stream->vrr_active_fixed = *update->vrr_active_fixed;
 
-	if (update->crtc_timing_adjust)
-		stream->adjust = *update->crtc_timing_adjust;
-
 	if (update->dpms_off)
 		stream->dpms_off = *update->dpms_off;
 
@@ -2997,11 +2899,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
 				       update->dsc_config->num_slices_v != 0);
 
 		/* Use temporarry context for validating new DSC config */
-		struct dc_state *dsc_validate_context = dc_create_state(dc);
+		struct dc_state *dsc_validate_context = dc_state_create_copy(dc->current_state);
 
 		if (dsc_validate_context) {
-			dc_resource_state_copy_construct(dc->current_state, dsc_validate_context);
-
 			stream->timing.dsc_cfg = *update->dsc_config;
 			stream->timing.flags.DSC = enable_dsc;
 			if (!dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, true)) {
@@ -3010,7 +2910,7 @@ static void copy_stream_update_to_stream(struct dc *dc,
 				update->dsc_config = NULL;
 			}
 
-			dc_release_state(dsc_validate_context);
+			dc_state_release(dsc_validate_context);
 		} else {
 			DC_ERROR("Failed to allocate new validate context for DSC change\n");
 			update->dsc_config = NULL;
@@ -3109,30 +3009,27 @@ static bool update_planes_and_stream_state(struct dc *dc,
 			new_planes[i] = srf_updates[i].surface;
 
 		/* initialize scratch memory for building context */
-		context = dc_create_state(dc);
+		context = dc_state_create_copy(dc->current_state);
 		if (context == NULL) {
 			DC_ERROR("Failed to allocate new validate context!\n");
 			return false;
 		}
 
-		dc_resource_state_copy_construct(
-				dc->current_state, context);
-
 		/* For each full update, remove all existing phantom pipes first.
 		 * Ensures that we have enough pipes for newly added MPO planes
 		 */
-		if (dc->res_pool->funcs->remove_phantom_pipes)
-			dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
+		dc_state_remove_phantom_streams_and_planes(dc, context);
+		dc_state_release_phantom_streams_and_planes(dc, context);
 
 		/*remove old surfaces from context */
-		if (!dc_rem_all_planes_for_stream(dc, stream, context)) {
+		if (!dc_state_rem_all_planes_for_stream(dc, stream, context)) {
 
 			BREAK_TO_DEBUGGER();
 			goto fail;
 		}
 
 		/* add surface to context */
-		if (!dc_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) {
+		if (!dc_state_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) {
 
 			BREAK_TO_DEBUGGER();
 			goto fail;
@@ -3157,19 +3054,6 @@ static bool update_planes_and_stream_state(struct dc *dc,
 
 	if (update_type == UPDATE_TYPE_FULL) {
 		if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
-			/* For phantom pipes we remove and create a new set of phantom pipes
-			 * for each full update (because we don't know if we'll need phantom
-			 * pipes until after the first round of validation). However, if validation
-			 * fails we need to keep the existing phantom pipes (because we don't update
-			 * the dc->current_state).
-			 *
-			 * The phantom stream/plane refcount is decremented for validation because
-			 * we assume it'll be removed (the free comes when the dc_state is freed),
-			 * but if validation fails we have to increment back the refcount so it's
-			 * consistent.
-			 */
-			if (dc->res_pool->funcs->retain_phantom_pipes)
-				dc->res_pool->funcs->retain_phantom_pipes(dc, dc->current_state);
 			BREAK_TO_DEBUGGER();
 			goto fail;
 		}
@@ -3190,7 +3074,7 @@ static bool update_planes_and_stream_state(struct dc *dc,
 	return true;
 
 fail:
-	dc_release_state(context);
+	dc_state_release(context);
 
 	return false;
 
@@ -3386,7 +3270,7 @@ void dc_dmub_update_dirty_rect(struct dc *dc,
 
 			update_dirty_rect->panel_inst = panel_inst;
 			update_dirty_rect->pipe_idx = j;
-			dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+			dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 		}
 	}
 }
@@ -3488,18 +3372,24 @@ static void commit_planes_for_stream_fast(struct dc *dc,
 {
 	int i, j;
 	struct pipe_ctx *top_pipe_to_program = NULL;
+	struct dc_stream_status *stream_status = NULL;
 	dc_z10_restore(dc);
 
 	top_pipe_to_program = resource_get_otg_master_for_stream(
 			&context->res_ctx,
 			stream);
 
-	if (dc->debug.visual_confirm) {
-		for (i = 0; i < dc->res_pool->pipe_count; i++) {
-			struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+	if (!top_pipe_to_program)
+		return;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-			if (pipe->stream && pipe->plane_state)
-				dc_update_viusal_confirm_color(dc, context, pipe);
+		if (pipe->stream && pipe->plane_state) {
+			set_p_state_switch_method(dc, context, pipe);
+
+			if (dc->debug.visual_confirm)
+				dc_update_visual_confirm_color(dc, context, pipe);
 		}
 	}
 
@@ -3523,6 +3413,8 @@ static void commit_planes_for_stream_fast(struct dc *dc,
 		}
 	}
 
+	stream_status = dc_state_get_stream_status(context, stream);
+
 	build_dmub_cmd_list(dc,
 			srf_updates,
 			surface_count,
@@ -3535,7 +3427,8 @@ static void commit_planes_for_stream_fast(struct dc *dc,
 			context->dmub_cmd_count,
 			context->block_sequence,
 			&(context->block_sequence_steps),
-			top_pipe_to_program);
+			top_pipe_to_program,
+			stream_status);
 	hwss_execute_sequence(dc,
 			context->block_sequence,
 			context->block_sequence_steps);
@@ -3631,7 +3524,7 @@ static void commit_planes_for_stream(struct dc *dc,
 		struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
 		// Check old context for SubVP
-		subvp_prev_use |= (old_pipe->stream && old_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM);
+		subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM);
 		if (subvp_prev_use)
 			break;
 	}
@@ -3639,19 +3532,22 @@ static void commit_planes_for_stream(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			subvp_curr_use = true;
 			break;
 		}
 	}
 
-	if (dc->debug.visual_confirm)
-		for (i = 0; i < dc->res_pool->pipe_count; i++) {
-			struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (pipe->stream && pipe->plane_state) {
+			set_p_state_switch_method(dc, context, pipe);
 
-			if (pipe->stream && pipe->plane_state)
-				dc_update_viusal_confirm_color(dc, context, pipe);
+			if (dc->debug.visual_confirm)
+				dc_update_visual_confirm_color(dc, context, pipe);
 		}
+	}
 
 	if (stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE) {
 		struct pipe_ctx *mpcc_pipe;
@@ -4024,7 +3920,7 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+		if (dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_NONE) {
 			subvp_active = true;
 			break;
 		}
@@ -4061,7 +3957,7 @@ struct pipe_split_policy_backup {
 static void release_minimal_transition_state(struct dc *dc,
 		struct dc_state *context, struct pipe_split_policy_backup *policy)
 {
-	dc_release_state(context);
+	dc_state_release(context);
 	/* restore previous pipe split and odm policy */
 	if (!dc->config.is_vmin_only_asic)
 		dc->debug.pipe_split_policy = policy->mpc_policy;
@@ -4072,7 +3968,7 @@ static void release_minimal_transition_state(struct dc *dc,
 static struct dc_state *create_minimal_transition_state(struct dc *dc,
 		struct dc_state *base_context, struct pipe_split_policy_backup *policy)
 {
-	struct dc_state *minimal_transition_context = dc_create_state(dc);
+	struct dc_state *minimal_transition_context = NULL;
 	unsigned int i, j;
 
 	if (!dc->config.is_vmin_only_asic) {
@@ -4084,7 +3980,9 @@ static struct dc_state *create_minimal_transition_state(struct dc *dc,
 	policy->subvp_policy = dc->debug.force_disable_subvp;
 	dc->debug.force_disable_subvp = true;
 
-	dc_resource_state_copy_construct(base_context, minimal_transition_context);
+	minimal_transition_context = dc_state_create_copy(base_context);
+	if (!minimal_transition_context)
+		return NULL;
 
 	/* commit minimal state */
 	if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context, false)) {
@@ -4116,7 +4014,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc,
 	bool success = false;
 	struct dc_state *minimal_transition_context;
 	struct pipe_split_policy_backup policy;
-	struct mall_temp_config mall_temp_config;
 
 	/* commit based on new context */
 	/* Since all phantom pipes are removed in full validation,
@@ -4125,8 +4022,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc,
 	 * pipe as subvp/phantom will be cleared (dc copy constructor
 	 * creates a shallow copy).
 	 */
-	if (dc->res_pool->funcs->save_mall_state)
-		dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config);
 	minimal_transition_context = create_minimal_transition_state(dc,
 			context, &policy);
 	if (minimal_transition_context) {
@@ -4139,16 +4034,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc,
 			success = dc_commit_state_no_check(dc, minimal_transition_context) == DC_OK;
 		}
 		release_minimal_transition_state(dc, minimal_transition_context, &policy);
-		if (dc->res_pool->funcs->restore_mall_state)
-			dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config);
-		/* If we do a minimal transition with plane removal and the context
-		 * has subvp we also have to retain back the phantom stream / planes
-		 * since the refcount is decremented as part of the min transition
-		 * (we commit a state with no subvp, so the phantom streams / planes
-		 * had to be removed).
-		 */
-		if (dc->res_pool->funcs->retain_phantom_pipes)
-			dc->res_pool->funcs->retain_phantom_pipes(dc, context);
 	}
 
 	if (!success) {
@@ -4216,7 +4101,7 @@ static bool commit_minimal_transition_state(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) {
 			subvp_in_use = true;
 			break;
 		}
@@ -4403,8 +4288,7 @@ static bool full_update_required(struct dc *dc,
 			stream_update->mst_bw_update ||
 			stream_update->func_shaper ||
 			stream_update->lut3d_func ||
-			stream_update->pending_test_pattern ||
-			stream_update->crtc_timing_adjust))
+			stream_update->pending_test_pattern))
 		return true;
 
 	if (stream) {
@@ -4482,7 +4366,6 @@ bool dc_update_planes_and_stream(struct dc *dc,
 	struct dc_state *context;
 	enum surface_update_type update_type;
 	int i;
-	struct mall_temp_config mall_temp_config;
 	struct dc_fast_update fast_update[MAX_SURFACES] = {0};
 
 	/* In cases where MPO and split or ODM are used transitions can
@@ -4526,23 +4409,10 @@ bool dc_update_planes_and_stream(struct dc *dc,
 		 * pipe as subvp/phantom will be cleared (dc copy constructor
 		 * creates a shallow copy).
 		 */
-		if (dc->res_pool->funcs->save_mall_state)
-			dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config);
 		if (!commit_minimal_transition_state(dc, context)) {
-			dc_release_state(context);
+			dc_state_release(context);
 			return false;
 		}
-		if (dc->res_pool->funcs->restore_mall_state)
-			dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config);
-
-		/* If we do a minimal transition with plane removal and the context
-		 * has subvp we also have to retain back the phantom stream / planes
-		 * since the refcount is decremented as part of the min transition
-		 * (we commit a state with no subvp, so the phantom streams / planes
-		 * had to be removed).
-		 */
-		if (dc->res_pool->funcs->retain_phantom_pipes)
-			dc->res_pool->funcs->retain_phantom_pipes(dc, context);
 		update_type = UPDATE_TYPE_FULL;
 	}
 
@@ -4599,7 +4469,7 @@ bool dc_update_planes_and_stream(struct dc *dc,
 		struct dc_state *old = dc->current_state;
 
 		dc->current_state = context;
-		dc_release_state(old);
+		dc_state_release(old);
 
 		// clear any forced full updates
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -4658,14 +4528,12 @@ void dc_commit_updates_for_stream(struct dc *dc,
 	if (update_type >= UPDATE_TYPE_FULL) {
 
 		/* initialize scratch memory for building context */
-		context = dc_create_state(dc);
+		context = dc_state_create_copy(state);
 		if (context == NULL) {
 			DC_ERROR("Failed to allocate new validate context!\n");
 			return;
 		}
 
-		dc_resource_state_copy_construct(state, context);
-
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
 			struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -4704,7 +4572,7 @@ void dc_commit_updates_for_stream(struct dc *dc,
 	if (update_type >= UPDATE_TYPE_FULL) {
 		if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
 			DC_ERROR("Mode validation failed for stream update!\n");
-			dc_release_state(context);
+			dc_state_release(context);
 			return;
 		}
 	}
@@ -4737,7 +4605,7 @@ void dc_commit_updates_for_stream(struct dc *dc,
 		struct dc_state *old = dc->current_state;
 
 		dc->current_state = context;
-		dc_release_state(old);
+		dc_state_release(old);
 
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
@@ -4810,7 +4678,7 @@ void dc_set_power_state(
 
 	switch (power_state) {
 	case DC_ACPI_CM_POWER_STATE_D0:
-		dc_resource_state_construct(dc, dc->current_state);
+		dc_state_construct(dc, dc->current_state);
 
 		dc_z10_restore(dc);
 
@@ -4825,7 +4693,7 @@ void dc_set_power_state(
 	default:
 		ASSERT(dc->current_state->stream_count == 0);
 
-		dc_resource_state_destruct(dc->current_state);
+		dc_state_destruct(dc->current_state);
 
 		break;
 	}
@@ -4902,6 +4770,38 @@ bool dc_set_psr_allow_active(struct dc *dc, bool enable)
 	return true;
 }
 
+/* enable/disable eDP Replay without specify stream for eDP */
+bool dc_set_replay_allow_active(struct dc *dc, bool active)
+{
+	int i;
+	bool allow_active;
+
+	for (i = 0; i < dc->current_state->stream_count; i++) {
+		struct dc_link *link;
+		struct dc_stream_state *stream = dc->current_state->streams[i];
+
+		link = stream->link;
+		if (!link)
+			continue;
+
+		if (link->replay_settings.replay_feature_enabled) {
+			if (active && !link->replay_settings.replay_allow_active) {
+				allow_active = true;
+				if (!dc_link_set_replay_allow_active(link, &allow_active,
+					false, false, NULL))
+					return false;
+			} else if (!active && link->replay_settings.replay_allow_active) {
+				allow_active = false;
+				if (!dc_link_set_replay_allow_active(link, &allow_active,
+					true, false, NULL))
+					return false;
+			}
+		}
+	}
+
+	return true;
+}
+
 void dc_allow_idle_optimizations(struct dc *dc, bool allow)
 {
 	if (dc->debug.disable_idle_power_optimizations)
@@ -5095,18 +4995,28 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc)
  */
 bool dc_is_dmub_outbox_supported(struct dc *dc)
 {
-	/* DCN31 B0 USB4 DPIA needs dmub notifications for interrupts */
-	if (dc->ctx->asic_id.chip_family == FAMILY_YELLOW_CARP &&
-	    dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 &&
-	    !dc->debug.dpia_debug.bits.disable_dpia)
-		return true;
+	switch (dc->ctx->asic_id.chip_family) {
 
-	if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1 &&
-	    !dc->debug.dpia_debug.bits.disable_dpia)
-		return true;
+	case FAMILY_YELLOW_CARP:
+		/* DCN31 B0 USB4 DPIA needs dmub notifications for interrupts */
+		if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 &&
+		    !dc->debug.dpia_debug.bits.disable_dpia)
+			return true;
+	break;
+
+	case AMDGPU_FAMILY_GC_11_0_1:
+	case AMDGPU_FAMILY_GC_11_5_0:
+		if (!dc->debug.dpia_debug.bits.disable_dpia)
+			return true;
+	break;
+
+	default:
+		break;
+	}
 
 	/* dmub aux needs dmub notifications to be enabled */
 	return dc->debug.enable_dmub_aux_for_legacy_ddc;
+
 }
 
 /**
@@ -5203,7 +5113,7 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc,
 			);
 	}
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -5257,7 +5167,7 @@ bool dc_process_dmub_set_config_async(struct dc *dc,
 	cmd.set_config_access.set_config_control.cmd_pkt.msg_type = payload->msg_type;
 	cmd.set_config_access.set_config_control.cmd_pkt.msg_data = payload->msg_data;
 
-	if (!dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
+	if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
 		/* command is not processed by dmub */
 		notify->sc_status = SET_CONFIG_UNKNOWN_ERROR;
 		return is_cmd_complete;
@@ -5300,7 +5210,7 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
 	cmd.set_mst_alloc_slots.mst_slots_control.instance = dc->links[link_index]->ddc_hw_inst;
 	cmd.set_mst_alloc_slots.mst_slots_control.mst_alloc_slots = mst_alloc_slots;
 
-	if (!dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+	if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
 		/* command is not processed by dmub */
 		return DC_ERROR_UNEXPECTED;
 
@@ -5338,7 +5248,7 @@ void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc,
 	cmd.dpia_hpd_int_enable.header.type = DMUB_CMD__DPIA_HPD_INT_ENABLE;
 	cmd.dpia_hpd_int_enable.enable = hpd_int_enable;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	DC_LOG_DEBUG("%s: hpd_int_enable(%d)\n", __func__, hpd_int_enable);
 }
@@ -5437,6 +5347,8 @@ bool dc_abm_save_restore(
 	struct dc_link *link = stream->sink->link;
 	struct dc_link *edp_links[MAX_NUM_EDP];
 
+	if (link->replay_settings.replay_feature_enabled)
+		return false;
 
 	/*find primary pipe associated with stream*/
 	for (i = 0; i < MAX_PIPES; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index fe07160932d6..9c05b1a07142 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -31,6 +31,7 @@
 #include "basics/dc_common.h"
 #include "resource.h"
 #include "dc_dmub_srv.h"
+#include "dc_state_priv.h"
 
 #define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
 
@@ -425,45 +426,130 @@ void get_hdr_visual_confirm_color(
 }
 
 void get_subvp_visual_confirm_color(
-		struct dc *dc,
-		struct dc_state *context,
 		struct pipe_ctx *pipe_ctx,
 		struct tg_color *color)
 {
 	uint32_t color_value = MAX_TG_COLOR_VALUE;
-	bool enable_subvp = false;
-	int i;
-
-	if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !context)
-		return;
+	if (pipe_ctx) {
+		switch (pipe_ctx->p_state_type) {
+		case P_STATE_SUB_VP:
+			color->color_r_cr = color_value;
+			color->color_g_y  = 0;
+			color->color_b_cb = 0;
+			break;
+		case P_STATE_DRR_SUB_VP:
+			color->color_r_cr = 0;
+			color->color_g_y  = color_value;
+			color->color_b_cb = 0;
+			break;
+		case P_STATE_V_BLANK_SUB_VP:
+			color->color_r_cr = 0;
+			color->color_g_y  = 0;
+			color->color_b_cb = color_value;
+			break;
+		default:
+			break;
+		}
+	}
+}
 
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+void get_mclk_switch_visual_confirm_color(
+		struct pipe_ctx *pipe_ctx,
+		struct tg_color *color)
+{
+	uint32_t color_value = MAX_TG_COLOR_VALUE;
 
-		if (pipe->stream && pipe->stream->mall_stream_config.paired_stream &&
-		    pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
-			/* SubVP enable - red */
-			color->color_g_y = 0;
+	if (pipe_ctx) {
+		switch (pipe_ctx->p_state_type) {
+		case P_STATE_V_BLANK:
+			color->color_r_cr = color_value;
+			color->color_g_y = color_value;
 			color->color_b_cb = 0;
+			break;
+		case P_STATE_FPO:
+			color->color_r_cr = 0;
+			color->color_g_y  = color_value;
+			color->color_b_cb = color_value;
+			break;
+		case P_STATE_V_ACTIVE:
 			color->color_r_cr = color_value;
-			enable_subvp = true;
-
-			if (pipe_ctx->stream == pipe->stream)
-				return;
+			color->color_g_y  = 0;
+			color->color_b_cb = color_value;
+			break;
+		case P_STATE_SUB_VP:
+			color->color_r_cr = color_value;
+			color->color_g_y  = 0;
+			color->color_b_cb = 0;
+			break;
+		case P_STATE_DRR_SUB_VP:
+			color->color_r_cr = 0;
+			color->color_g_y  = color_value;
+			color->color_b_cb = 0;
+			break;
+		case P_STATE_V_BLANK_SUB_VP:
+			color->color_r_cr = 0;
+			color->color_g_y  = 0;
+			color->color_b_cb = color_value;
+			break;
+		default:
 			break;
 		}
 	}
+}
 
-	if (enable_subvp && pipe_ctx->stream->mall_stream_config.type == SUBVP_NONE) {
-		color->color_r_cr = 0;
-		if (pipe_ctx->stream->allow_freesync == 1) {
-			/* SubVP enable and DRR on - green */
-			color->color_b_cb = 0;
-			color->color_g_y = color_value;
+void set_p_state_switch_method(
+		struct dc *dc,
+		struct dc_state *context,
+		struct pipe_ctx *pipe_ctx)
+{
+	struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+	bool enable_subvp;
+
+	if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba || !context)
+		return;
+
+	if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] !=
+			dm_dram_clock_change_unsupported) {
+		/* MCLK switching is supported */
+		if (!pipe_ctx->has_vactive_margin) {
+			/* In Vblank - yellow */
+			pipe_ctx->p_state_type = P_STATE_V_BLANK;
+
+			if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+				/* FPO + Vblank - cyan */
+				pipe_ctx->p_state_type = P_STATE_FPO;
+			}
 		} else {
-			/* SubVP enable and No DRR - blue */
-			color->color_g_y = 0;
-			color->color_b_cb = color_value;
+			/* In Vactive - pink */
+			pipe_ctx->p_state_type = P_STATE_V_ACTIVE;
+		}
+
+		/* SubVP */
+		enable_subvp = false;
+
+		for (int i = 0; i < dc->res_pool->pipe_count; i++) {
+			struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+			if (pipe->stream && dc_state_get_paired_subvp_stream(context, pipe->stream) &&
+					dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+				/* SubVP enable - red */
+				pipe_ctx->p_state_type = P_STATE_SUB_VP;
+				enable_subvp = true;
+
+				if (pipe_ctx->stream == pipe->stream)
+					return;
+				break;
+			}
+		}
+
+		if (enable_subvp && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_NONE) {
+			if (pipe_ctx->stream->allow_freesync == 1) {
+				/* SubVP enable and DRR on - green */
+				pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP;
+			} else {
+				/* SubVP enable and No DRR - blue */
+				pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP;
+			}
 		}
 	}
 }
@@ -473,7 +559,8 @@ void hwss_build_fast_sequence(struct dc *dc,
 		unsigned int dmub_cmd_count,
 		struct block_sequence block_sequence[],
 		int *num_steps,
-		struct pipe_ctx *pipe_ctx)
+		struct pipe_ctx *pipe_ctx,
+		struct dc_stream_status *stream_status)
 {
 	struct dc_plane_state *plane = pipe_ctx->plane_state;
 	struct dc_stream_state *stream = pipe_ctx->stream;
@@ -490,7 +577,8 @@ void hwss_build_fast_sequence(struct dc *dc,
 	if (dc->hwss.subvp_pipe_control_lock_fast) {
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc;
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = true;
-		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.pipe_ctx = pipe_ctx;
+		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip =
+				plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN;
 		block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST;
 		(*num_steps)++;
 	}
@@ -529,7 +617,7 @@ void hwss_build_fast_sequence(struct dc *dc,
 			}
 			if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) {
 				if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) &&
-						current_mpc_pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+						stream_status->mall_stream_config.type == SUBVP_MAIN) {
 					block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv;
 					block_sequence[*num_steps].params.subvp_save_surf_addr.addr = &current_mpc_pipe->plane_state->address;
 					block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index;
@@ -612,7 +700,8 @@ void hwss_build_fast_sequence(struct dc *dc,
 	if (dc->hwss.subvp_pipe_control_lock_fast) {
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc;
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = false;
-		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.pipe_ctx = pipe_ctx;
+		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip =
+				plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN;
 		block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST;
 		(*num_steps)++;
 	}
@@ -724,7 +813,7 @@ void hwss_send_dmcub_cmd(union block_sequence_params *params)
 	union dmub_rb_cmd *cmd = params->send_dmcub_cmd_params.cmd;
 	enum dm_dmub_wait_type wait_type = params->send_dmcub_cmd_params.wait_type;
 
-	dm_execute_dmub_cmd(ctx, cmd, wait_type);
+	dc_wake_and_execute_dmub_cmd(ctx, cmd, wait_type);
 }
 
 void hwss_program_manual_trigger(union block_sequence_params *params)
@@ -812,42 +901,6 @@ void hwss_subvp_save_surf_addr(union block_sequence_params *params)
 	dc_dmub_srv_subvp_save_surf_addr(dc_dmub_srv, addr, subvp_index);
 }
 
-void get_mclk_switch_visual_confirm_color(
-		struct dc *dc,
-		struct dc_state *context,
-		struct pipe_ctx *pipe_ctx,
-		struct tg_color *color)
-{
-	uint32_t color_value = MAX_TG_COLOR_VALUE;
-	struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
-
-	if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba || !context)
-		return;
-
-	if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] !=
-			dm_dram_clock_change_unsupported) {
-		/* MCLK switching is supported */
-		if (!pipe_ctx->has_vactive_margin) {
-			/* In Vblank - yellow */
-			color->color_r_cr = color_value;
-			color->color_g_y = color_value;
-
-			if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
-				/* FPO + Vblank - cyan */
-				color->color_r_cr = 0;
-				color->color_g_y  = color_value;
-				color->color_b_cb = color_value;
-			}
-		} else {
-			/* In Vactive - pink */
-			color->color_r_cr = color_value;
-			color->color_b_cb = color_value;
-		}
-		/* SubVP */
-		get_subvp_visual_confirm_color(dc, context, pipe_ctx, color);
-	}
-}
-
 void get_surface_tile_visual_confirm_color(
 		struct pipe_ctx *pipe_ctx,
 		struct tg_color *color)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
index ed94187c2afa..c6c35037bdb8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
@@ -467,6 +467,13 @@ bool dc_link_setup_psr(struct dc_link *link,
 	return link->dc->link_srv->edp_setup_psr(link, stream, psr_config, psr_context);
 }
 
+bool dc_link_set_replay_allow_active(struct dc_link *link, const bool *allow_active,
+		bool wait, bool force_static, const unsigned int *power_opts)
+{
+	return link->dc->link_srv->edp_set_replay_allow_active(link, allow_active, wait,
+			force_static, power_opts);
+}
+
 bool dc_link_get_replay_state(const struct dc_link *link, uint64_t *state)
 {
 	return link->dc->link_srv->edp_get_replay_state(link, state);
@@ -497,7 +504,7 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable)
 	link->dc->link_srv->enable_hpd_filter(link, enable);
 }
 
-bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count)
+bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count)
 {
 	return dc->link_srv->validate_dpia_bandwidth(streams, count);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index a1f1d1003992..57f0ddd15923 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -42,6 +42,7 @@
 #include "link_enc_cfg.h"
 #include "link.h"
 #include "clk_mgr.h"
+#include "dc_state_priv.h"
 #include "virtual/virtual_link_hwss.h"
 #include "link/hwss/link_hwss_dio.h"
 #include "link/hwss/link_hwss_dpia.h"
@@ -69,8 +70,8 @@
 #include "dcn314/dcn314_resource.h"
 #include "dcn315/dcn315_resource.h"
 #include "dcn316/dcn316_resource.h"
-#include "../dcn32/dcn32_resource.h"
-#include "../dcn321/dcn321_resource.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn321/dcn321_resource.h"
 #include "dcn35/dcn35_resource.h"
 
 #define VISUAL_CONFIRM_BASE_DEFAULT 3
@@ -1764,6 +1765,29 @@ int recource_find_free_pipe_not_used_in_cur_res_ctx(
 	return free_pipe_idx;
 }
 
+int recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
+		const struct resource_context *cur_res_ctx,
+		struct resource_context *new_res_ctx,
+		const struct resource_pool *pool)
+{
+	int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND;
+	const struct pipe_ctx *new_pipe, *cur_pipe;
+	int i;
+
+	for (i = 0; i < pool->pipe_count; i++) {
+		cur_pipe = &cur_res_ctx->pipe_ctx[i];
+		new_pipe = &new_res_ctx->pipe_ctx[i];
+
+		if (resource_is_pipe_type(cur_pipe, OTG_MASTER) &&
+				resource_is_pipe_type(new_pipe, FREE_PIPE)) {
+			free_pipe_idx = i;
+			break;
+		}
+	}
+
+	return free_pipe_idx;
+}
+
 int resource_find_free_pipe_used_as_cur_sec_dpp_in_mpcc_combine(
 		const struct resource_context *cur_res_ctx,
 		struct resource_context *new_res_ctx,
@@ -2233,7 +2257,7 @@ static struct pipe_ctx *get_last_dpp_pipe_in_mpcc_combine(
 }
 
 static bool update_pipe_params_after_odm_slice_count_change(
-		const struct dc_stream_state *stream,
+		struct pipe_ctx *otg_master,
 		struct dc_state *context,
 		const struct resource_pool *pool)
 {
@@ -2243,9 +2267,12 @@ static bool update_pipe_params_after_odm_slice_count_change(
 
 	for (i = 0; i < pool->pipe_count && result; i++) {
 		pipe = &context->res_ctx.pipe_ctx[i];
-		if (pipe->stream == stream && pipe->plane_state)
+		if (pipe->stream == otg_master->stream && pipe->plane_state)
 			result = resource_build_scaling_params(pipe);
 	}
+
+	if (pool->funcs->build_pipe_pix_clk_params)
+		pool->funcs->build_pipe_pix_clk_params(otg_master);
 	return result;
 }
 
@@ -2433,6 +2460,9 @@ void resource_remove_otg_master_for_stream_output(struct dc_state *context,
 	struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(
 			&context->res_ctx, stream);
 
+	if (!otg_master)
+		return;
+
 	ASSERT(resource_get_odm_slice_count(otg_master) == 1);
 	ASSERT(otg_master->plane_state == NULL);
 	ASSERT(otg_master->stream_res.stream_enc);
@@ -2928,7 +2958,7 @@ bool resource_update_pipes_for_stream_with_slice_count(
 					otg_master, new_ctx, pool);
 	if (result)
 		result = update_pipe_params_after_odm_slice_count_change(
-				otg_master->stream, new_ctx, pool);
+				otg_master, new_ctx, pool);
 	return result;
 }
 
@@ -2967,189 +2997,6 @@ bool resource_update_pipes_for_plane_with_slice_count(
 	return result;
 }
 
-bool dc_add_plane_to_context(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state *plane_state,
-		struct dc_state *context)
-{
-	struct resource_pool *pool = dc->res_pool;
-	struct pipe_ctx *otg_master_pipe;
-	struct dc_stream_status *stream_status = NULL;
-	bool added = false;
-
-	stream_status = dc_stream_get_status_from_state(context, stream);
-	if (stream_status == NULL) {
-		dm_error("Existing stream not found; failed to attach surface!\n");
-		goto out;
-	} else if (stream_status->plane_count == MAX_SURFACE_NUM) {
-		dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n",
-				plane_state, MAX_SURFACE_NUM);
-		goto out;
-	}
-
-	otg_master_pipe = resource_get_otg_master_for_stream(
-			&context->res_ctx, stream);
-	added = resource_append_dpp_pipes_for_plane_composition(context,
-			dc->current_state, pool, otg_master_pipe, plane_state);
-
-	if (added) {
-		stream_status->plane_states[stream_status->plane_count] =
-				plane_state;
-		stream_status->plane_count++;
-		dc_plane_state_retain(plane_state);
-	}
-
-out:
-	return added;
-}
-
-bool dc_remove_plane_from_context(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state *plane_state,
-		struct dc_state *context)
-{
-	int i;
-	struct dc_stream_status *stream_status = NULL;
-	struct resource_pool *pool = dc->res_pool;
-
-	if (!plane_state)
-		return true;
-
-	for (i = 0; i < context->stream_count; i++)
-		if (context->streams[i] == stream) {
-			stream_status = &context->stream_status[i];
-			break;
-		}
-
-	if (stream_status == NULL) {
-		dm_error("Existing stream not found; failed to remove plane.\n");
-		return false;
-	}
-
-	resource_remove_dpp_pipes_for_plane_composition(
-			context, pool, plane_state);
-
-	for (i = 0; i < stream_status->plane_count; i++) {
-		if (stream_status->plane_states[i] == plane_state) {
-			dc_plane_state_release(stream_status->plane_states[i]);
-			break;
-		}
-	}
-
-	if (i == stream_status->plane_count) {
-		dm_error("Existing plane_state not found; failed to detach it!\n");
-		return false;
-	}
-
-	stream_status->plane_count--;
-
-	/* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */
-	for (; i < stream_status->plane_count; i++)
-		stream_status->plane_states[i] = stream_status->plane_states[i + 1];
-
-	stream_status->plane_states[stream_status->plane_count] = NULL;
-
-	if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm)
-		/* ODM combine could prevent us from supporting more planes
-		 * we will reset ODM slice count back to 1 when all planes have
-		 * been removed to maximize the amount of planes supported when
-		 * new planes are added.
-		 */
-		resource_update_pipes_for_stream_with_slice_count(
-				context, dc->current_state, dc->res_pool, stream, 1);
-
-	return true;
-}
-
-/**
- * dc_rem_all_planes_for_stream - Remove planes attached to the target stream.
- *
- * @dc: Current dc state.
- * @stream: Target stream, which we want to remove the attached plans.
- * @context: New context.
- *
- * Return:
- * Return true if DC was able to remove all planes from the target
- * stream, otherwise, return false.
- */
-bool dc_rem_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_state *context)
-{
-	int i, old_plane_count;
-	struct dc_stream_status *stream_status = NULL;
-	struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
-
-	for (i = 0; i < context->stream_count; i++)
-			if (context->streams[i] == stream) {
-				stream_status = &context->stream_status[i];
-				break;
-			}
-
-	if (stream_status == NULL) {
-		dm_error("Existing stream %p not found!\n", stream);
-		return false;
-	}
-
-	old_plane_count = stream_status->plane_count;
-
-	for (i = 0; i < old_plane_count; i++)
-		del_planes[i] = stream_status->plane_states[i];
-
-	for (i = 0; i < old_plane_count; i++)
-		if (!dc_remove_plane_from_context(dc, stream, del_planes[i], context))
-			return false;
-
-	return true;
-}
-
-static bool add_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		const struct dc_validation_set set[],
-		int set_count,
-		struct dc_state *context)
-{
-	int i, j;
-
-	for (i = 0; i < set_count; i++)
-		if (set[i].stream == stream)
-			break;
-
-	if (i == set_count) {
-		dm_error("Stream %p not found in set!\n", stream);
-		return false;
-	}
-
-	for (j = 0; j < set[i].plane_count; j++)
-		if (!dc_add_plane_to_context(dc, stream, set[i].plane_states[j], context))
-			return false;
-
-	return true;
-}
-
-bool dc_add_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state * const *plane_states,
-		int plane_count,
-		struct dc_state *context)
-{
-	struct dc_validation_set set;
-	int i;
-
-	set.stream = stream;
-	set.plane_count = plane_count;
-
-	for (i = 0; i < plane_count; i++)
-		set.plane_states[i] = plane_states[i];
-
-	return add_all_planes_for_stream(dc, stream, &set, 1, context);
-}
-
 bool dc_is_timing_changed(struct dc_stream_state *cur_stream,
 		       struct dc_stream_state *new_stream)
 {
@@ -3301,84 +3148,6 @@ static struct audio *find_first_free_audio(
 	return NULL;
 }
 
-/*
- * dc_add_stream_to_ctx() - Add a new dc_stream_state to a dc_state.
- */
-enum dc_status dc_add_stream_to_ctx(
-		struct dc *dc,
-		struct dc_state *new_ctx,
-		struct dc_stream_state *stream)
-{
-	enum dc_status res;
-	DC_LOGGER_INIT(dc->ctx->logger);
-
-	if (new_ctx->stream_count >= dc->res_pool->timing_generator_count) {
-		DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream);
-		return DC_ERROR_UNEXPECTED;
-	}
-
-	new_ctx->streams[new_ctx->stream_count] = stream;
-	dc_stream_retain(stream);
-	new_ctx->stream_count++;
-
-	res = resource_add_otg_master_for_stream_output(
-			new_ctx, dc->res_pool, stream);
-	if (res != DC_OK)
-		DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res);
-
-	return res;
-}
-
-/*
- * dc_remove_stream_from_ctx() - Remove a stream from a dc_state.
- */
-enum dc_status dc_remove_stream_from_ctx(
-			struct dc *dc,
-			struct dc_state *new_ctx,
-			struct dc_stream_state *stream)
-{
-	int i;
-	struct dc_context *dc_ctx = dc->ctx;
-	struct pipe_ctx *del_pipe = resource_get_otg_master_for_stream(
-			&new_ctx->res_ctx, stream);
-
-	if (!del_pipe) {
-		DC_ERROR("Pipe not found for stream %p !\n", stream);
-		return DC_ERROR_UNEXPECTED;
-	}
-
-	resource_update_pipes_for_stream_with_slice_count(new_ctx,
-			dc->current_state, dc->res_pool, stream, 1);
-	resource_remove_otg_master_for_stream_output(
-			new_ctx, dc->res_pool, stream);
-
-	for (i = 0; i < new_ctx->stream_count; i++)
-		if (new_ctx->streams[i] == stream)
-			break;
-
-	if (new_ctx->streams[i] != stream) {
-		DC_ERROR("Context doesn't have stream %p !\n", stream);
-		return DC_ERROR_UNEXPECTED;
-	}
-
-	dc_stream_release(new_ctx->streams[i]);
-	new_ctx->stream_count--;
-
-	/* Trim back arrays */
-	for (; i < new_ctx->stream_count; i++) {
-		new_ctx->streams[i] = new_ctx->streams[i + 1];
-		new_ctx->stream_status[i] = new_ctx->stream_status[i + 1];
-	}
-
-	new_ctx->streams[new_ctx->stream_count] = NULL;
-	memset(
-			&new_ctx->stream_status[new_ctx->stream_count],
-			0,
-			sizeof(new_ctx->stream_status[0]));
-
-	return DC_OK;
-}
-
 static struct dc_stream_state *find_pll_sharable_stream(
 		struct dc_stream_state *stream_needs_pll,
 		struct dc_state *context)
@@ -3586,6 +3355,7 @@ static void mark_seamless_boot_stream(
  *       |________|_______________|___________|_____________|
  */
 static bool acquire_otg_master_pipe_for_stream(
+		const struct dc_state *cur_ctx,
 		struct dc_state *new_ctx,
 		const struct resource_pool *pool,
 		struct dc_stream_state *stream)
@@ -3599,7 +3369,22 @@ static bool acquire_otg_master_pipe_for_stream(
 	int pipe_idx;
 	struct pipe_ctx *pipe_ctx = NULL;
 
-	pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool);
+	/*
+	 * Upper level code is responsible to optimize unnecessary addition and
+	 * removal for unchanged streams. So unchanged stream will keep the same
+	 * OTG master instance allocated. When current stream is removed and a
+	 * new stream is added, we want to reuse the OTG instance made available
+	 * by the removed stream first. If not found, we try to avoid of using
+	 * any free pipes already used in current context as this could tear
+	 * down exiting ODM/MPC/MPO configuration unnecessarily.
+	 */
+	pipe_idx = recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
+			&cur_ctx->res_ctx, &new_ctx->res_ctx, pool);
+	if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+		pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx(
+				&cur_ctx->res_ctx, &new_ctx->res_ctx, pool);
+	if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+		pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool);
 	if (pipe_idx != FREE_PIPE_INDEX_NOT_FOUND) {
 		pipe_ctx = &new_ctx->res_ctx.pipe_ctx[pipe_idx];
 		memset(pipe_ctx, 0, sizeof(*pipe_ctx));
@@ -3659,7 +3444,7 @@ enum dc_status resource_map_pool_resources(
 
 	if (!acquired)
 		/* acquire new resources */
-		acquired = acquire_otg_master_pipe_for_stream(
+		acquired = acquire_otg_master_pipe_for_stream(dc->current_state,
 				context, pool, stream);
 
 	pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream);
@@ -3742,34 +3527,6 @@ enum dc_status resource_map_pool_resources(
 	return DC_ERROR_UNEXPECTED;
 }
 
-/**
- * dc_resource_state_copy_construct_current() - Creates a new dc_state from existing state
- *
- * @dc: copy out of dc->current_state
- * @dst_ctx: copy into this
- *
- * This function makes a shallow copy of the current DC state and increments
- * refcounts on existing streams and planes.
- */
-void dc_resource_state_copy_construct_current(
-		const struct dc *dc,
-		struct dc_state *dst_ctx)
-{
-	dc_resource_state_copy_construct(dc->current_state, dst_ctx);
-}
-
-
-void dc_resource_state_construct(
-		const struct dc *dc,
-		struct dc_state *dst_ctx)
-{
-	dst_ctx->clk_mgr = dc->clk_mgr;
-
-	/* Initialise DIG link encoder resource tracking variables. */
-	link_enc_cfg_init(dc, dst_ctx);
-}
-
-
 bool dc_resource_is_dsc_encoding_supported(const struct dc *dc)
 {
 	if (dc->res_pool == NULL)
@@ -3813,6 +3570,31 @@ static bool planes_changed_for_existing_stream(struct dc_state *context,
 	return false;
 }
 
+static bool add_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		const struct dc_validation_set set[],
+		int set_count,
+		struct dc_state *state)
+{
+	int i, j;
+
+	for (i = 0; i < set_count; i++)
+		if (set[i].stream == stream)
+			break;
+
+	if (i == set_count) {
+		dm_error("Stream %p not found in set!\n", stream);
+		return false;
+	}
+
+	for (j = 0; j < set[i].plane_count; j++)
+		if (!dc_state_add_plane(dc, stream, set[i].plane_states[j], state))
+			return false;
+
+	return true;
+}
+
 /**
  * dc_validate_with_context - Validate and update the potential new stream in the context object
  *
@@ -3918,7 +3700,8 @@ enum dc_status dc_validate_with_context(struct dc *dc,
 						       unchanged_streams[i],
 						       set,
 						       set_count)) {
-			if (!dc_rem_all_planes_for_stream(dc,
+
+			if (!dc_state_rem_all_planes_for_stream(dc,
 							  unchanged_streams[i],
 							  context)) {
 				res = DC_FAIL_DETACH_SURFACES;
@@ -3940,12 +3723,24 @@ enum dc_status dc_validate_with_context(struct dc *dc,
 			}
 		}
 
-		if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) {
-			res = DC_FAIL_DETACH_SURFACES;
-			goto fail;
+		if (dc_state_get_stream_subvp_type(context, del_streams[i]) == SUBVP_PHANTOM) {
+			/* remove phantoms specifically */
+			if (!dc_state_rem_all_phantom_planes_for_stream(dc, del_streams[i], context, true)) {
+				res = DC_FAIL_DETACH_SURFACES;
+				goto fail;
+			}
+
+			res = dc_state_remove_phantom_stream(dc, context, del_streams[i]);
+			dc_state_release_phantom_stream(dc, context, del_streams[i]);
+		} else {
+			if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) {
+				res = DC_FAIL_DETACH_SURFACES;
+				goto fail;
+			}
+
+			res = dc_state_remove_stream(dc, context, del_streams[i]);
 		}
 
-		res = dc_remove_stream_from_ctx(dc, context, del_streams[i]);
 		if (res != DC_OK)
 			goto fail;
 	}
@@ -3968,7 +3763,7 @@ enum dc_status dc_validate_with_context(struct dc *dc,
 	/* Add new streams and then add all planes for the new stream */
 	for (i = 0; i < add_streams_count; i++) {
 		calculate_phy_pix_clks(add_streams[i]);
-		res = dc_add_stream_to_ctx(dc, context, add_streams[i]);
+		res = dc_state_add_stream(dc, context, add_streams[i]);
 		if (res != DC_OK)
 			goto fail;
 
@@ -4474,84 +4269,6 @@ static void set_vtem_info_packet(
 	*info_packet = stream->vtem_infopacket;
 }
 
-void dc_resource_state_destruct(struct dc_state *context)
-{
-	int i, j;
-
-	for (i = 0; i < context->stream_count; i++) {
-		for (j = 0; j < context->stream_status[i].plane_count; j++)
-			dc_plane_state_release(
-				context->stream_status[i].plane_states[j]);
-
-		context->stream_status[i].plane_count = 0;
-		dc_stream_release(context->streams[i]);
-		context->streams[i] = NULL;
-	}
-	context->stream_count = 0;
-	context->stream_mask = 0;
-	memset(&context->res_ctx, 0, sizeof(context->res_ctx));
-	memset(&context->pp_display_cfg, 0, sizeof(context->pp_display_cfg));
-	memset(&context->dcn_bw_vars, 0, sizeof(context->dcn_bw_vars));
-	context->clk_mgr = NULL;
-	memset(&context->bw_ctx.bw, 0, sizeof(context->bw_ctx.bw));
-	memset(context->block_sequence, 0, sizeof(context->block_sequence));
-	context->block_sequence_steps = 0;
-	memset(context->dc_dmub_cmd, 0, sizeof(context->dc_dmub_cmd));
-	context->dmub_cmd_count = 0;
-	memset(&context->perf_params, 0, sizeof(context->perf_params));
-	memset(&context->scratch, 0, sizeof(context->scratch));
-}
-
-void dc_resource_state_copy_construct(
-		const struct dc_state *src_ctx,
-		struct dc_state *dst_ctx)
-{
-	int i, j;
-	struct kref refcount = dst_ctx->refcount;
-#ifdef CONFIG_DRM_AMD_DC_FP
-	struct dml2_context *dml2 = NULL;
-
-	// Need to preserve allocated dml2 context
-	if (src_ctx->clk_mgr->ctx->dc->debug.using_dml2)
-		dml2 = dst_ctx->bw_ctx.dml2;
-#endif
-
-	*dst_ctx = *src_ctx;
-
-#ifdef CONFIG_DRM_AMD_DC_FP
-	// Preserve allocated dml2 context
-	if (src_ctx->clk_mgr->ctx->dc->debug.using_dml2)
-		dst_ctx->bw_ctx.dml2 = dml2;
-#endif
-
-	for (i = 0; i < MAX_PIPES; i++) {
-		struct pipe_ctx *cur_pipe = &dst_ctx->res_ctx.pipe_ctx[i];
-
-		if (cur_pipe->top_pipe)
-			cur_pipe->top_pipe =  &dst_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
-
-		if (cur_pipe->bottom_pipe)
-			cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
-
-		if (cur_pipe->next_odm_pipe)
-			cur_pipe->next_odm_pipe =  &dst_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
-
-		if (cur_pipe->prev_odm_pipe)
-			cur_pipe->prev_odm_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
-	}
-
-	for (i = 0; i < dst_ctx->stream_count; i++) {
-		dc_stream_retain(dst_ctx->streams[i]);
-		for (j = 0; j < dst_ctx->stream_status[i].plane_count; j++)
-			dc_plane_state_retain(
-				dst_ctx->stream_status[i].plane_states[j]);
-	}
-
-	/* context refcount should not be overridden */
-	dst_ctx->refcount = refcount;
-
-}
-
 struct clock_source *dc_resource_find_first_free_pll(
 		struct resource_context *res_ctx,
 		const struct resource_pool *pool)
@@ -4731,7 +4448,7 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
 			option = DITHER_OPTION_SPATIAL8;
 			break;
 		case COLOR_DEPTH_101010:
-			option = DITHER_OPTION_SPATIAL10;
+			option = DITHER_OPTION_TRUN10;
 			break;
 		default:
 			option = DITHER_OPTION_DISABLE;
@@ -4757,6 +4474,8 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
 			option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) {
 		fmt_bit_depth->flags.TRUNCATE_ENABLED = 1;
 		fmt_bit_depth->flags.TRUNCATE_DEPTH = 2;
+		if (option == DITHER_OPTION_TRUN10)
+			fmt_bit_depth->flags.TRUNCATE_MODE = 1;
 	}
 
 	/* special case - Formatter can only reduce by 4 bits at most.
@@ -5267,6 +4986,20 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc,
 	return DC_OK;
 }
 
+bool resource_subvp_in_use(struct dc *dc,
+		struct dc_state *context)
+{
+	uint32_t i;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE)
+			return true;
+	}
+	return false;
+}
+
 bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream)
 {
 	if (!dc->debug.disable_subvp_high_refresh && is_subvp_high_refresh_candidate(stream))
@@ -5274,7 +5007,7 @@ bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_st
 	if (dc->current_state->stream_count == 1 && stream->timing.v_addressable >= 2880 &&
 			((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
 		return true;
-	else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 2160 &&
+	else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 1080 &&
 			((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
 		return true;
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
new file mode 100644
index 000000000000..460a8010c79f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -0,0 +1,865 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "core_types.h"
+#include "core_status.h"
+#include "dc_state.h"
+#include "dc_state_priv.h"
+#include "dc_stream_priv.h"
+#include "dc_plane_priv.h"
+
+#include "dm_services.h"
+#include "resource.h"
+#include "link_enc_cfg.h"
+
+#include "dml2/dml2_wrapper.h"
+#include "dml2/dml2_internal_types.h"
+
+#define DC_LOGGER \
+	dc->ctx->logger
+#define DC_LOGGER_INIT(logger)
+
+/* Private dc_state helper functions */
+static bool dc_state_track_phantom_stream(struct dc_state *state,
+		struct dc_stream_state *phantom_stream)
+{
+	if (state->phantom_stream_count >= MAX_PHANTOM_PIPES)
+		return false;
+
+	state->phantom_streams[state->phantom_stream_count++] = phantom_stream;
+
+	return true;
+}
+
+static bool dc_state_untrack_phantom_stream(struct dc_state *state, struct dc_stream_state *phantom_stream)
+{
+	bool res = false;
+	int i;
+
+	/* first find phantom stream in the dc_state */
+	for (i = 0; i < state->phantom_stream_count; i++) {
+		if (state->phantom_streams[i] == phantom_stream) {
+			state->phantom_streams[i] = NULL;
+			res = true;
+			break;
+		}
+	}
+
+	/* failed to find stream in state */
+	if (!res)
+		return res;
+
+	/* trim back phantom streams */
+	state->phantom_stream_count--;
+	for (; i < state->phantom_stream_count; i++)
+		state->phantom_streams[i] = state->phantom_streams[i + 1];
+
+	return res;
+}
+
+static bool dc_state_is_phantom_stream_tracked(struct dc_state *state, struct dc_stream_state *phantom_stream)
+{
+	int i;
+
+	for (i = 0; i < state->phantom_stream_count; i++) {
+		if (state->phantom_streams[i] == phantom_stream)
+			return true;
+	}
+
+	return false;
+}
+
+static bool dc_state_track_phantom_plane(struct dc_state *state,
+		struct dc_plane_state *phantom_plane)
+{
+	if (state->phantom_plane_count >= MAX_PHANTOM_PIPES)
+		return false;
+
+	state->phantom_planes[state->phantom_plane_count++] = phantom_plane;
+
+	return true;
+}
+
+static bool dc_state_untrack_phantom_plane(struct dc_state *state, struct dc_plane_state *phantom_plane)
+{
+	bool res = false;
+	int i;
+
+	/* first find phantom plane in the dc_state */
+	for (i = 0; i < state->phantom_plane_count; i++) {
+		if (state->phantom_planes[i] == phantom_plane) {
+			state->phantom_planes[i] = NULL;
+			res = true;
+			break;
+		}
+	}
+
+	/* failed to find plane in state */
+	if (!res)
+		return res;
+
+	/* trim back phantom planes */
+	state->phantom_plane_count--;
+	for (; i < state->phantom_plane_count; i++)
+		state->phantom_planes[i] = state->phantom_planes[i + 1];
+
+	return res;
+}
+
+static bool dc_state_is_phantom_plane_tracked(struct dc_state *state, struct dc_plane_state *phantom_plane)
+{
+	int i;
+
+	for (i = 0; i < state->phantom_plane_count; i++) {
+		if (state->phantom_planes[i] == phantom_plane)
+			return true;
+	}
+
+	return false;
+}
+
+static void dc_state_copy_internal(struct dc_state *dst_state, struct dc_state *src_state)
+{
+	int i, j;
+
+	memcpy(dst_state, src_state, sizeof(struct dc_state));
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		struct pipe_ctx *cur_pipe = &dst_state->res_ctx.pipe_ctx[i];
+
+		if (cur_pipe->top_pipe)
+			cur_pipe->top_pipe =  &dst_state->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
+
+		if (cur_pipe->bottom_pipe)
+			cur_pipe->bottom_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
+
+		if (cur_pipe->prev_odm_pipe)
+			cur_pipe->prev_odm_pipe =  &dst_state->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
+
+		if (cur_pipe->next_odm_pipe)
+			cur_pipe->next_odm_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
+	}
+
+	/* retain phantoms */
+	for (i = 0; i < dst_state->phantom_stream_count; i++)
+		dc_stream_retain(dst_state->phantom_streams[i]);
+
+	for (i = 0; i < dst_state->phantom_plane_count; i++)
+		dc_plane_state_retain(dst_state->phantom_planes[i]);
+
+	/* retain streams and planes */
+	for (i = 0; i < dst_state->stream_count; i++) {
+		dc_stream_retain(dst_state->streams[i]);
+		for (j = 0; j < dst_state->stream_status[i].plane_count; j++)
+			dc_plane_state_retain(
+					dst_state->stream_status[i].plane_states[j]);
+	}
+
+}
+
+static void init_state(struct dc *dc, struct dc_state *state)
+{
+	/* Each context must have their own instance of VBA and in order to
+	 * initialize and obtain IP and SOC the base DML instance from DC is
+	 * initially copied into every context
+	 */
+	memcpy(&state->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
+}
+
+/* Public dc_state functions */
+struct dc_state *dc_state_create(struct dc *dc)
+{
+	struct dc_state *state = kvzalloc(sizeof(struct dc_state),
+			GFP_KERNEL);
+
+	if (!state)
+		return NULL;
+
+	init_state(dc, state);
+	dc_state_construct(dc, state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+	if (dc->debug.using_dml2)
+		dml2_create(dc, &dc->dml2_options, &state->bw_ctx.dml2);
+#endif
+
+	kref_init(&state->refcount);
+
+	return state;
+}
+
+void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state)
+{
+	struct kref refcount = dst_state->refcount;
+#ifdef CONFIG_DRM_AMD_DC_FP
+	struct dml2_context *dst_dml2 = dst_state->bw_ctx.dml2;
+#endif
+
+	dc_state_copy_internal(dst_state, src_state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+	dst_state->bw_ctx.dml2 = dst_dml2;
+	if (src_state->bw_ctx.dml2)
+		dml2_copy(dst_state->bw_ctx.dml2, src_state->bw_ctx.dml2);
+#endif
+
+	/* context refcount should not be overridden */
+	dst_state->refcount = refcount;
+}
+
+struct dc_state *dc_state_create_copy(struct dc_state *src_state)
+{
+	struct dc_state *new_state;
+
+	new_state = kvmalloc(sizeof(struct dc_state),
+			GFP_KERNEL);
+	if (!new_state)
+		return NULL;
+
+	dc_state_copy_internal(new_state, src_state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+	if (src_state->bw_ctx.dml2 &&
+			!dml2_create_copy(&new_state->bw_ctx.dml2, src_state->bw_ctx.dml2)) {
+		dc_state_release(new_state);
+		return NULL;
+	}
+#endif
+
+	kref_init(&new_state->refcount);
+
+	return new_state;
+}
+
+void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state)
+{
+	dc_state_copy(dst_state, dc->current_state);
+}
+
+struct dc_state *dc_state_create_current_copy(struct dc *dc)
+{
+	return dc_state_create_copy(dc->current_state);
+}
+
+void dc_state_construct(struct dc *dc, struct dc_state *state)
+{
+	state->clk_mgr = dc->clk_mgr;
+
+	/* Initialise DIG link encoder resource tracking variables. */
+	link_enc_cfg_init(dc, state);
+}
+
+void dc_state_destruct(struct dc_state *state)
+{
+	int i, j;
+
+	for (i = 0; i < state->stream_count; i++) {
+		for (j = 0; j < state->stream_status[i].plane_count; j++)
+			dc_plane_state_release(
+					state->stream_status[i].plane_states[j]);
+
+		state->stream_status[i].plane_count = 0;
+		dc_stream_release(state->streams[i]);
+		state->streams[i] = NULL;
+	}
+	state->stream_count = 0;
+
+	/* release tracked phantoms */
+	for (i = 0; i < state->phantom_stream_count; i++) {
+		dc_stream_release(state->phantom_streams[i]);
+		state->phantom_streams[i] = NULL;
+	}
+
+	for (i = 0; i < state->phantom_plane_count; i++) {
+		dc_plane_state_release(state->phantom_planes[i]);
+		state->phantom_planes[i] = NULL;
+	}
+	state->stream_mask = 0;
+	memset(&state->res_ctx, 0, sizeof(state->res_ctx));
+	memset(&state->pp_display_cfg, 0, sizeof(state->pp_display_cfg));
+	memset(&state->dcn_bw_vars, 0, sizeof(state->dcn_bw_vars));
+	state->clk_mgr = NULL;
+	memset(&state->bw_ctx.bw, 0, sizeof(state->bw_ctx.bw));
+	memset(state->block_sequence, 0, sizeof(state->block_sequence));
+	state->block_sequence_steps = 0;
+	memset(state->dc_dmub_cmd, 0, sizeof(state->dc_dmub_cmd));
+	state->dmub_cmd_count = 0;
+	memset(&state->perf_params, 0, sizeof(state->perf_params));
+	memset(&state->scratch, 0, sizeof(state->scratch));
+}
+
+void dc_state_retain(struct dc_state *state)
+{
+	kref_get(&state->refcount);
+}
+
+static void dc_state_free(struct kref *kref)
+{
+	struct dc_state *state = container_of(kref, struct dc_state, refcount);
+
+	dc_state_destruct(state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+	dml2_destroy(state->bw_ctx.dml2);
+	state->bw_ctx.dml2 = 0;
+#endif
+
+	kvfree(state);
+}
+
+void dc_state_release(struct dc_state *state)
+{
+	kref_put(&state->refcount, dc_state_free);
+}
+/*
+ * dc_state_add_stream() - Add a new dc_stream_state to a dc_state.
+ */
+enum dc_status dc_state_add_stream(
+		struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *stream)
+{
+	enum dc_status res;
+
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	if (state->stream_count >= dc->res_pool->timing_generator_count) {
+		DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream);
+		return DC_ERROR_UNEXPECTED;
+	}
+
+	state->streams[state->stream_count] = stream;
+	dc_stream_retain(stream);
+	state->stream_count++;
+
+	res = resource_add_otg_master_for_stream_output(
+			state, dc->res_pool, stream);
+	if (res != DC_OK)
+		DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res);
+
+	return res;
+}
+
+/*
+ * dc_state_remove_stream() - Remove a stream from a dc_state.
+ */
+enum dc_status dc_state_remove_stream(
+		struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *stream)
+{
+	int i;
+	struct pipe_ctx *del_pipe = resource_get_otg_master_for_stream(
+			&state->res_ctx, stream);
+
+	if (!del_pipe) {
+		dm_error("Pipe not found for stream %p !\n", stream);
+		return DC_ERROR_UNEXPECTED;
+	}
+
+	resource_update_pipes_for_stream_with_slice_count(state,
+			dc->current_state, dc->res_pool, stream, 1);
+	resource_remove_otg_master_for_stream_output(
+			state, dc->res_pool, stream);
+
+	for (i = 0; i < state->stream_count; i++)
+		if (state->streams[i] == stream)
+			break;
+
+	if (state->streams[i] != stream) {
+		dm_error("Context doesn't have stream %p !\n", stream);
+		return DC_ERROR_UNEXPECTED;
+	}
+
+	dc_stream_release(state->streams[i]);
+	state->stream_count--;
+
+	/* Trim back arrays */
+	for (; i < state->stream_count; i++) {
+		state->streams[i] = state->streams[i + 1];
+		state->stream_status[i] = state->stream_status[i + 1];
+	}
+
+	state->streams[state->stream_count] = NULL;
+	memset(
+			&state->stream_status[state->stream_count],
+			0,
+			sizeof(state->stream_status[0]));
+
+	return DC_OK;
+}
+
+bool dc_state_add_plane(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state *plane_state,
+		struct dc_state *state)
+{
+	struct resource_pool *pool = dc->res_pool;
+	struct pipe_ctx *otg_master_pipe;
+	struct dc_stream_status *stream_status = NULL;
+	bool added = false;
+
+	stream_status = dc_state_get_stream_status(state, stream);
+	if (stream_status == NULL) {
+		dm_error("Existing stream not found; failed to attach surface!\n");
+		goto out;
+	} else if (stream_status->plane_count == MAX_SURFACE_NUM) {
+		dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n",
+				plane_state, MAX_SURFACE_NUM);
+		goto out;
+	}
+
+	otg_master_pipe = resource_get_otg_master_for_stream(
+			&state->res_ctx, stream);
+	added = resource_append_dpp_pipes_for_plane_composition(state,
+			dc->current_state, pool, otg_master_pipe, plane_state);
+
+	if (added) {
+		stream_status->plane_states[stream_status->plane_count] =
+				plane_state;
+		stream_status->plane_count++;
+		dc_plane_state_retain(plane_state);
+	}
+
+out:
+	return added;
+}
+
+bool dc_state_remove_plane(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state *plane_state,
+		struct dc_state *state)
+{
+	int i;
+	struct dc_stream_status *stream_status = NULL;
+	struct resource_pool *pool = dc->res_pool;
+
+	if (!plane_state)
+		return true;
+
+	for (i = 0; i < state->stream_count; i++)
+		if (state->streams[i] == stream) {
+			stream_status = &state->stream_status[i];
+			break;
+		}
+
+	if (stream_status == NULL) {
+		dm_error("Existing stream not found; failed to remove plane.\n");
+		return false;
+	}
+
+	resource_remove_dpp_pipes_for_plane_composition(
+			state, pool, plane_state);
+
+	for (i = 0; i < stream_status->plane_count; i++) {
+		if (stream_status->plane_states[i] == plane_state) {
+			dc_plane_state_release(stream_status->plane_states[i]);
+			break;
+		}
+	}
+
+	if (i == stream_status->plane_count) {
+		dm_error("Existing plane_state not found; failed to detach it!\n");
+		return false;
+	}
+
+	stream_status->plane_count--;
+
+	/* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */
+	for (; i < stream_status->plane_count; i++)
+		stream_status->plane_states[i] = stream_status->plane_states[i + 1];
+
+	stream_status->plane_states[stream_status->plane_count] = NULL;
+
+	if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm)
+		/* ODM combine could prevent us from supporting more planes
+		 * we will reset ODM slice count back to 1 when all planes have
+		 * been removed to maximize the amount of planes supported when
+		 * new planes are added.
+		 */
+		resource_update_pipes_for_stream_with_slice_count(
+				state, dc->current_state, dc->res_pool, stream, 1);
+
+	return true;
+}
+
+/**
+ * dc_state_rem_all_planes_for_stream - Remove planes attached to the target stream.
+ *
+ * @dc: Current dc state.
+ * @stream: Target stream, which we want to remove the attached plans.
+ * @state: context from which the planes are to be removed.
+ *
+ * Return:
+ * Return true if DC was able to remove all planes from the target
+ * stream, otherwise, return false.
+ */
+bool dc_state_rem_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_state *state)
+{
+	int i, old_plane_count;
+	struct dc_stream_status *stream_status = NULL;
+	struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
+
+	for (i = 0; i < state->stream_count; i++)
+		if (state->streams[i] == stream) {
+			stream_status = &state->stream_status[i];
+			break;
+		}
+
+	if (stream_status == NULL) {
+		dm_error("Existing stream %p not found!\n", stream);
+		return false;
+	}
+
+	old_plane_count = stream_status->plane_count;
+
+	for (i = 0; i < old_plane_count; i++)
+		del_planes[i] = stream_status->plane_states[i];
+
+	for (i = 0; i < old_plane_count; i++)
+		if (!dc_state_remove_plane(dc, stream, del_planes[i], state))
+			return false;
+
+	return true;
+}
+
+bool dc_state_add_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state * const *plane_states,
+		int plane_count,
+		struct dc_state *state)
+{
+	int i;
+	bool result = true;
+
+	for (i = 0; i < plane_count; i++)
+		if (!dc_state_add_plane(dc, stream, plane_states[i], state)) {
+			result = false;
+			break;
+		}
+
+	return result;
+}
+
+/* Private dc_state functions */
+
+/**
+ * dc_state_get_stream_status - Get stream status from given dc state
+ * @state: DC state to find the stream status in
+ * @stream: The stream to get the stream status for
+ *
+ * The given stream is expected to exist in the given dc state. Otherwise, NULL
+ * will be returned.
+ */
+struct dc_stream_status *dc_state_get_stream_status(
+		struct dc_state *state,
+		struct dc_stream_state *stream)
+{
+	uint8_t i;
+
+	if (state == NULL)
+		return NULL;
+
+	for (i = 0; i < state->stream_count; i++) {
+		if (stream == state->streams[i])
+			return &state->stream_status[i];
+	}
+
+	return NULL;
+}
+
+enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state,
+		const struct pipe_ctx *pipe_ctx)
+{
+	return dc_state_get_stream_subvp_type(state, pipe_ctx->stream);
+}
+
+enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state,
+		const struct dc_stream_state *stream)
+{
+	int i;
+
+	enum mall_stream_type type = SUBVP_NONE;
+
+	for (i = 0; i < state->stream_count; i++) {
+		if (state->streams[i] == stream) {
+			type = state->stream_status[i].mall_stream_config.type;
+			break;
+		}
+	}
+
+	return type;
+}
+
+struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state,
+		const struct dc_stream_state *stream)
+{
+	int i;
+
+	struct dc_stream_state *paired_stream = NULL;
+
+	for (i = 0; i < state->stream_count; i++) {
+		if (state->streams[i] == stream) {
+			paired_stream = state->stream_status[i].mall_stream_config.paired_stream;
+			break;
+		}
+	}
+
+	return paired_stream;
+}
+
+struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *main_stream)
+{
+	struct dc_stream_state *phantom_stream;
+
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	phantom_stream = dc_create_stream_for_sink(main_stream->sink);
+
+	if (!phantom_stream) {
+		DC_LOG_ERROR("Failed to allocate phantom stream.\n");
+		return NULL;
+	}
+
+	/* track phantom stream in dc_state */
+	dc_state_track_phantom_stream(state, phantom_stream);
+
+	phantom_stream->is_phantom = true;
+	phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
+	phantom_stream->dpms_off = true;
+
+	return phantom_stream;
+}
+
+void dc_state_release_phantom_stream(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream)
+{
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	if (!dc_state_untrack_phantom_stream(state, phantom_stream)) {
+		DC_LOG_ERROR("Failed to free phantom stream %p in dc state %p.\n", phantom_stream, state);
+		return;
+	}
+
+	dc_stream_release(phantom_stream);
+}
+
+struct dc_plane_state *dc_state_create_phantom_plane(struct dc *dc,
+		struct dc_state *state,
+		struct dc_plane_state *main_plane)
+{
+	struct dc_plane_state *phantom_plane = dc_create_plane_state(dc);
+
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	if (!phantom_plane) {
+		DC_LOG_ERROR("Failed to allocate phantom plane.\n");
+		return NULL;
+	}
+
+	/* track phantom inside dc_state */
+	dc_state_track_phantom_plane(state, phantom_plane);
+
+	phantom_plane->is_phantom = true;
+
+	return phantom_plane;
+}
+
+void dc_state_release_phantom_plane(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_plane_state *phantom_plane)
+{
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	if (!dc_state_untrack_phantom_plane(state, phantom_plane)) {
+		DC_LOG_ERROR("Failed to free phantom plane %p in dc state %p.\n", phantom_plane, state);
+		return;
+	}
+
+	dc_plane_state_release(phantom_plane);
+}
+
+/* add phantom streams to context and generate correct meta inside dc_state */
+enum dc_status dc_state_add_phantom_stream(struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream,
+		struct dc_stream_state *main_stream)
+{
+	struct dc_stream_status *main_stream_status;
+	struct dc_stream_status *phantom_stream_status;
+	enum dc_status res = dc_state_add_stream(dc, state, phantom_stream);
+
+	/* check if stream is tracked */
+	if (res == DC_OK && !dc_state_is_phantom_stream_tracked(state, phantom_stream)) {
+		/* stream must be tracked if added to state */
+		dc_state_track_phantom_stream(state, phantom_stream);
+	}
+
+	/* setup subvp meta */
+	main_stream_status = dc_state_get_stream_status(state, main_stream);
+	phantom_stream_status = dc_state_get_stream_status(state, phantom_stream);
+	phantom_stream_status->mall_stream_config.type = SUBVP_PHANTOM;
+	phantom_stream_status->mall_stream_config.paired_stream = main_stream;
+	main_stream_status->mall_stream_config.type = SUBVP_MAIN;
+	main_stream_status->mall_stream_config.paired_stream = phantom_stream;
+
+	return res;
+}
+
+enum dc_status dc_state_remove_phantom_stream(struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream)
+{
+	struct dc_stream_status *main_stream_status;
+	struct dc_stream_status *phantom_stream_status;
+
+	/* reset subvp meta */
+	phantom_stream_status = dc_state_get_stream_status(state, phantom_stream);
+	main_stream_status = dc_state_get_stream_status(state, phantom_stream_status->mall_stream_config.paired_stream);
+	phantom_stream_status->mall_stream_config.type = SUBVP_NONE;
+	phantom_stream_status->mall_stream_config.paired_stream = NULL;
+	if (main_stream_status) {
+		main_stream_status->mall_stream_config.type = SUBVP_NONE;
+		main_stream_status->mall_stream_config.paired_stream = NULL;
+	}
+
+	/* remove stream from state */
+	return dc_state_remove_stream(dc, state, phantom_stream);
+}
+
+bool dc_state_add_phantom_plane(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state *phantom_plane,
+		struct dc_state *state)
+{
+	bool res = dc_state_add_plane(dc, phantom_stream, phantom_plane, state);
+
+	/* check if stream is tracked */
+	if (res && !dc_state_is_phantom_plane_tracked(state, phantom_plane)) {
+		/* stream must be tracked if added to state */
+		dc_state_track_phantom_plane(state, phantom_plane);
+	}
+
+	return res;
+}
+
+bool dc_state_remove_phantom_plane(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state *phantom_plane,
+		struct dc_state *state)
+{
+	return dc_state_remove_plane(dc, phantom_stream, phantom_plane, state);
+}
+
+bool dc_state_rem_all_phantom_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_state *state,
+		bool should_release_planes)
+{
+	int i, old_plane_count;
+	struct dc_stream_status *stream_status = NULL;
+	struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
+
+	for (i = 0; i < state->stream_count; i++)
+		if (state->streams[i] == phantom_stream) {
+			stream_status = &state->stream_status[i];
+			break;
+		}
+
+	if (stream_status == NULL) {
+		dm_error("Existing stream %p not found!\n", phantom_stream);
+		return false;
+	}
+
+	old_plane_count = stream_status->plane_count;
+
+	for (i = 0; i < old_plane_count; i++)
+		del_planes[i] = stream_status->plane_states[i];
+
+	for (i = 0; i < old_plane_count; i++) {
+		if (!dc_state_remove_plane(dc, phantom_stream, del_planes[i], state))
+			return false;
+		if (should_release_planes)
+			dc_state_release_phantom_plane(dc, state, del_planes[i]);
+	}
+
+	return true;
+}
+
+bool dc_state_add_all_phantom_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state * const *phantom_planes,
+		int plane_count,
+		struct dc_state *state)
+{
+	return dc_state_add_all_planes_for_stream(dc, phantom_stream, phantom_planes, plane_count, state);
+}
+
+bool dc_state_remove_phantom_streams_and_planes(
+	struct dc *dc,
+	struct dc_state *state)
+{
+	int i;
+	bool removed_phantom = false;
+	struct dc_stream_state *phantom_stream = NULL;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+
+		if (pipe->plane_state && pipe->stream && dc_state_get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
+			phantom_stream = pipe->stream;
+
+			dc_state_rem_all_phantom_planes_for_stream(dc, phantom_stream, state, false);
+			dc_state_remove_phantom_stream(dc, state, phantom_stream);
+			removed_phantom = true;
+		}
+	}
+	return removed_phantom;
+}
+
+void dc_state_release_phantom_streams_and_planes(
+		struct dc *dc,
+		struct dc_state *state)
+{
+	int i;
+
+	for (i = 0; i < state->phantom_stream_count; i++)
+		dc_state_release_phantom_stream(dc, state, state->phantom_streams[i]);
+
+	for (i = 0; i < state->phantom_plane_count; i++)
+		dc_state_release_phantom_plane(dc, state, state->phantom_planes[i]);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 4bdf105d1d71..54670e0b1518 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -31,6 +31,8 @@
 #include "ipp.h"
 #include "timing_generator.h"
 #include "dc_dmub_srv.h"
+#include "dc_state_priv.h"
+#include "dc_stream_priv.h"
 
 #define DC_LOGGER dc->ctx->logger
 
@@ -54,7 +56,7 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink)
 	}
 }
 
-static bool dc_stream_construct(struct dc_stream_state *stream,
+bool dc_stream_construct(struct dc_stream_state *stream,
 	struct dc_sink *dc_sink_data)
 {
 	uint32_t i = 0;
@@ -121,13 +123,12 @@ static bool dc_stream_construct(struct dc_stream_state *stream,
 	}
 	stream->out_transfer_func->type = TF_TYPE_BYPASS;
 
-	stream->stream_id = stream->ctx->dc_stream_id_count;
-	stream->ctx->dc_stream_id_count++;
+	dc_stream_assign_stream_id(stream);
 
 	return true;
 }
 
-static void dc_stream_destruct(struct dc_stream_state *stream)
+void dc_stream_destruct(struct dc_stream_state *stream)
 {
 	dc_sink_release(stream->sink);
 	if (stream->out_transfer_func != NULL) {
@@ -136,6 +137,13 @@ static void dc_stream_destruct(struct dc_stream_state *stream)
 	}
 }
 
+void dc_stream_assign_stream_id(struct dc_stream_state *stream)
+{
+	/* MSB is reserved to indicate phantoms */
+	stream->stream_id = stream->ctx->dc_stream_id_count;
+	stream->ctx->dc_stream_id_count++;
+}
+
 void dc_stream_retain(struct dc_stream_state *stream)
 {
 	kref_get(&stream->refcount);
@@ -196,8 +204,7 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream)
 	if (new_stream->out_transfer_func)
 		dc_transfer_func_retain(new_stream->out_transfer_func);
 
-	new_stream->stream_id = new_stream->ctx->dc_stream_id_count;
-	new_stream->ctx->dc_stream_id_count++;
+	dc_stream_assign_stream_id(new_stream);
 
 	/* If using dynamic encoder assignment, wait till stream committed to assign encoder. */
 	if (new_stream->ctx->dc->res_pool->funcs->link_encs_assign)
@@ -209,31 +216,6 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream)
 }
 
 /**
- * dc_stream_get_status_from_state - Get stream status from given dc state
- * @state: DC state to find the stream status in
- * @stream: The stream to get the stream status for
- *
- * The given stream is expected to exist in the given dc state. Otherwise, NULL
- * will be returned.
- */
-struct dc_stream_status *dc_stream_get_status_from_state(
-	struct dc_state *state,
-	struct dc_stream_state *stream)
-{
-	uint8_t i;
-
-	if (state == NULL)
-		return NULL;
-
-	for (i = 0; i < state->stream_count; i++) {
-		if (stream == state->streams[i])
-			return &state->stream_status[i];
-	}
-
-	return NULL;
-}
-
-/**
  * dc_stream_get_status() - Get current stream status of the given stream state
  * @stream: The stream to get the stream status for.
  *
@@ -244,7 +226,7 @@ struct dc_stream_status *dc_stream_get_status(
 	struct dc_stream_state *stream)
 {
 	struct dc *dc = stream->ctx->dc;
-	return dc_stream_get_status_from_state(dc->current_state, stream);
+	return dc_state_get_stream_status(dc->current_state, stream);
 }
 
 static void program_cursor_attributes(
@@ -465,16 +447,37 @@ bool dc_stream_add_writeback(struct dc *dc,
 	if (dc->hwss.enable_writeback) {
 		struct dc_stream_status *stream_status = dc_stream_get_status(stream);
 		struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
-		dwb->otg_inst = stream_status->primary_otg_inst;
+		if (stream_status)
+			dwb->otg_inst = stream_status->primary_otg_inst;
 	}
+
+	if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
+		dm_error("DC: update_bandwidth failed!\n");
+		return false;
+	}
+
+	/* enable writeback */
+	if (dc->hwss.enable_writeback) {
+		struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
+
+		if (dwb->funcs->is_enabled(dwb)) {
+			/* writeback pipe already enabled, only need to update */
+			dc->hwss.update_writeback(dc, wb_info, dc->current_state);
+		} else {
+			/* Enable writeback pipe from scratch*/
+			dc->hwss.enable_writeback(dc, wb_info, dc->current_state);
+		}
+	}
+
 	return true;
 }
 
-bool dc_stream_remove_writeback(struct dc *dc,
+bool dc_stream_fc_disable_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		uint32_t dwb_pipe_inst)
 {
-	int i = 0, j = 0;
+	struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst];
+
 	if (stream == NULL) {
 		dm_error("DC: dc_stream is NULL!\n");
 		return false;
@@ -490,27 +493,63 @@ bool dc_stream_remove_writeback(struct dc *dc,
 		return false;
 	}
 
-//	stream->writeback_info[dwb_pipe_inst].wb_enabled = false;
-	for (i = 0; i < stream->num_wb_info; i++) {
-		/*dynamic update*/
-		if (stream->writeback_info[i].wb_enabled &&
-			stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst) {
-			stream->writeback_info[i].wb_enabled = false;
-		}
+	if (dwb->funcs->set_fc_enable)
+		dwb->funcs->set_fc_enable(dwb, DWB_FRAME_CAPTURE_DISABLE);
+
+	return true;
+}
+
+bool dc_stream_remove_writeback(struct dc *dc,
+		struct dc_stream_state *stream,
+		uint32_t dwb_pipe_inst)
+{
+	int i = 0, j = 0;
+	if (stream == NULL) {
+		dm_error("DC: dc_stream is NULL!\n");
+		return false;
+	}
+
+	if (dwb_pipe_inst >= MAX_DWB_PIPES) {
+		dm_error("DC: writeback pipe is invalid!\n");
+		return false;
+	}
+
+	if (stream->num_wb_info > MAX_DWB_PIPES) {
+		dm_error("DC: num_wb_info is invalid!\n");
+		return false;
 	}
 
 	/* remove writeback info for disabled writeback pipes from stream */
 	for (i = 0, j = 0; i < stream->num_wb_info; i++) {
 		if (stream->writeback_info[i].wb_enabled) {
-			if (j < i)
-				/* trim the array */
+
+			if (stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst)
+				stream->writeback_info[i].wb_enabled = false;
+
+			/* trim the array */
+			if (j < i) {
 				memcpy(&stream->writeback_info[j], &stream->writeback_info[i],
 						sizeof(struct dc_writeback_info));
-			j++;
+				j++;
+			}
 		}
 	}
 	stream->num_wb_info = j;
 
+	/* recalculate and apply DML parameters */
+	if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
+		dm_error("DC: update_bandwidth failed!\n");
+		return false;
+	}
+
+	/* disable writeback */
+	if (dc->hwss.disable_writeback) {
+		struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst];
+
+		if (dwb->funcs->is_enabled(dwb))
+			dc->hwss.disable_writeback(dc, dwb_pipe_inst);
+	}
+
 	return true;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
index a80e45300783..19a2c7140ae8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
@@ -32,10 +32,12 @@
 #include "transform.h"
 #include "dpp.h"
 
+#include "dc_plane_priv.h"
+
 /*******************************************************************************
  * Private functions
  ******************************************************************************/
-static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
+void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
 {
 	plane_state->ctx = ctx;
 
@@ -63,7 +65,7 @@ static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *pl
 
 }
 
-static void dc_plane_destruct(struct dc_plane_state *plane_state)
+void dc_plane_destruct(struct dc_plane_state *plane_state)
 {
 	if (plane_state->gamma_correction != NULL) {
 		dc_gamma_release(&plane_state->gamma_correction);
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 9316b737a8ba..f30a341bc090 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -27,6 +27,8 @@
 #define DC_INTERFACE_H_
 
 #include "dc_types.h"
+#include "dc_state.h"
+#include "dc_plane.h"
 #include "grph_object_defs.h"
 #include "logger_types.h"
 #include "hdcp_msg_types.h"
@@ -49,7 +51,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.259"
+#define DC_VER "3.2.265"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@@ -461,6 +463,12 @@ enum dml_hostvm_override_opts {
 	DML_HOSTVM_OVERRIDE_TRUE = 0x2,
 };
 
+enum dc_replay_power_opts {
+	replay_power_opt_invalid		= 0x0,
+	replay_power_opt_smu_opt_static_screen	= 0x1,
+	replay_power_opt_z10_static_screen	= 0x10,
+};
+
 enum dcc_option {
 	DCC_ENABLE = 0,
 	DCC_DISABLE = 1,
@@ -874,6 +882,7 @@ struct dc_debug_options {
 	unsigned int seamless_boot_odm_combine;
 	unsigned int force_odm_combine_4to1; //bit vector based on otg inst
 	int minimum_z8_residency_time;
+	int minimum_z10_residency_time;
 	bool disable_z9_mpc;
 	unsigned int force_fclk_khz;
 	bool enable_tri_buf;
@@ -955,7 +964,6 @@ struct dc_debug_options {
 	unsigned int min_prefetch_in_strobe_ns;
 	bool disable_unbounded_requesting;
 	bool dig_fifo_off_in_blank;
-	bool temp_mst_deallocation_sequence;
 	bool override_dispclk_programming;
 	bool otg_crc_db;
 	bool disallow_dispclk_dppclk_ds;
@@ -978,6 +986,9 @@ struct dc_debug_options {
 	bool psp_disabled_wa;
 	unsigned int ips2_eval_delay_us;
 	unsigned int ips2_entry_delay_us;
+	bool disable_timeout;
+	bool disable_extblankadj;
+	unsigned int static_screen_wait_frames;
 };
 
 struct gpu_info_soc_bounding_box_v1_0;
@@ -1025,7 +1036,6 @@ struct dc {
 
 	/* Require to optimize clocks and bandwidth for added/removed planes */
 	bool optimized_required;
-	bool wm_optimized_required;
 	bool idle_optimizations_allowed;
 	bool enable_c20_dtm_b0;
 
@@ -1388,13 +1398,6 @@ struct dc_surface_update {
 /*
  * Create a new surface with default parameters;
  */
-struct dc_plane_state *dc_create_plane_state(struct dc *dc);
-const struct dc_plane_status *dc_plane_get_status(
-		const struct dc_plane_state *plane_state);
-
-void dc_plane_state_retain(struct dc_plane_state *plane_state);
-void dc_plane_state_release(struct dc_plane_state *plane_state);
-
 void dc_gamma_retain(struct dc_gamma *dc_gamma);
 void dc_gamma_release(struct dc_gamma **dc_gamma);
 struct dc_gamma *dc_create_gamma(void);
@@ -1458,37 +1461,20 @@ enum dc_status dc_validate_global_state(
 		struct dc_state *new_ctx,
 		bool fast_validate);
 
-
-void dc_resource_state_construct(
-		const struct dc *dc,
-		struct dc_state *dst_ctx);
-
 bool dc_acquire_release_mpc_3dlut(
 		struct dc *dc, bool acquire,
 		struct dc_stream_state *stream,
 		struct dc_3dlut **lut,
 		struct dc_transfer_func **shaper);
 
-void dc_resource_state_copy_construct(
-		const struct dc_state *src_ctx,
-		struct dc_state *dst_ctx);
-
-void dc_resource_state_copy_construct_current(
-		const struct dc *dc,
-		struct dc_state *dst_ctx);
-
-void dc_resource_state_destruct(struct dc_state *context);
-
 bool dc_resource_is_dsc_encoding_supported(const struct dc *dc);
+void get_audio_check(struct audio_info *aud_modes,
+	struct audio_check *aud_chk);
 
 enum dc_status dc_commit_streams(struct dc *dc,
 				 struct dc_stream_state *streams[],
 				 uint8_t stream_count);
 
-struct dc_state *dc_create_state(struct dc *dc);
-struct dc_state *dc_copy_state(struct dc_state *src_ctx);
-void dc_retain_state(struct dc_state *context);
-void dc_release_state(struct dc_state *context);
 
 struct dc_plane_state *dc_get_surface_for_mpcc(struct dc *dc,
 		struct dc_stream_state *stream,
@@ -1540,7 +1526,13 @@ struct dc_link {
 	bool is_dig_mapping_flexible;
 	bool hpd_status; /* HPD status of link without physical HPD pin. */
 	bool is_hpd_pending; /* Indicates a new received hpd */
-	bool is_automated; /* Indicates automated testing */
+
+	/* USB4 DPIA links skip verifying link cap, instead performing the fallback method
+	 * for every link training. This is incompatible with DP LL compliance automation,
+	 * which expects the same link settings to be used every retry on a link loss.
+	 * This flag is used to skip the fallback when link loss occurs during automation.
+	 */
+	bool skip_fallback_on_link_loss;
 
 	bool edp_sink_present;
 
@@ -1608,7 +1600,6 @@ struct dc_link {
 	enum edp_revision edp_revision;
 	union dpcd_sink_ext_caps dpcd_sink_ext_caps;
 
-	struct backlight_settings backlight_settings;
 	struct psr_settings psr_settings;
 
 	struct replay_settings replay_settings;
@@ -2092,6 +2083,20 @@ bool dc_link_setup_psr(struct dc_link *dc_link,
 		const struct dc_stream_state *stream, struct psr_config *psr_config,
 		struct psr_context *psr_context);
 
+/*
+ * Communicate with DMUB to allow or disallow Panel Replay on the specified link:
+ *
+ * @link: pointer to the dc_link struct instance
+ * @enable: enable(active) or disable(inactive) replay
+ * @wait: state transition need to wait the active set completed.
+ * @force_static: force disable(inactive) the replay
+ * @power_opts: set power optimazation parameters to DMUB.
+ *
+ * return: allow Replay active will return true, else will return false.
+ */
+bool dc_link_set_replay_allow_active(struct dc_link *dc_link, const bool *enable,
+		bool wait, bool force_static, const unsigned int *power_opts);
+
 bool dc_link_get_replay_state(const struct dc_link *dc_link, uint64_t *state);
 
 /* On eDP links this function call will stall until T12 has elapsed.
@@ -2187,11 +2192,11 @@ int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link(
  *
  * @dc: pointer to dc struct
  * @stream: pointer to all possible streams
- * @num_streams: number of valid DPIA streams
+ * @count: number of valid DPIA streams
  *
  * return: TRUE if bw used by DPIAs doesn't exceed available BW else return FALSE
  */
-bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams,
+bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams,
 		const unsigned int count);
 
 /* Sink Interfaces - A sink corresponds to a display output device */
@@ -2336,6 +2341,9 @@ void dc_hardware_release(struct dc *dc);
 void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc);
 
 bool dc_set_psr_allow_active(struct dc *dc, bool enable);
+
+bool dc_set_replay_allow_active(struct dc *dc, bool active);
+
 void dc_z10_restore(const struct dc *dc);
 void dc_z10_save_init(struct dc *dc);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
index be9aa1a71847..26940d94d8fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
@@ -140,7 +140,7 @@ struct dc_vbios_funcs {
 	enum bp_result (*enable_lvtma_control)(
 		struct dc_bios *bios,
 		uint8_t uc_pwr_on,
-		uint8_t panel_instance,
+		uint8_t pwrseq_instance,
 		uint8_t bypass_panel_control_wait);
 
 	enum bp_result (*get_soc_bb_info)(
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 0e07699c1e83..2b79a0e5638e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -33,6 +33,7 @@
 #include "cursor_reg_cache.h"
 #include "resource.h"
 #include "clk_mgr.h"
+#include "dc_state_priv.h"
 
 #define CTX dc_dmub_srv->ctx
 #define DC_LOGGER CTX->logger
@@ -140,7 +141,10 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
 
 		if (status == DMUB_STATUS_QUEUE_FULL) {
 			/* Execute and wait for queue to become empty again. */
-			dmub_srv_cmd_execute(dmub);
+			status = dmub_srv_cmd_execute(dmub);
+			if (status == DMUB_STATUS_POWER_STATE_D3)
+				return false;
+
 			dmub_srv_wait_for_idle(dmub, 100000);
 
 			/* Requeue the command. */
@@ -148,16 +152,20 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
 		}
 
 		if (status != DMUB_STATUS_OK) {
-			DC_ERROR("Error queueing DMUB command: status=%d\n", status);
-			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			if (status != DMUB_STATUS_POWER_STATE_D3) {
+				DC_ERROR("Error queueing DMUB command: status=%d\n", status);
+				dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			}
 			return false;
 		}
 	}
 
 	status = dmub_srv_cmd_execute(dmub);
 	if (status != DMUB_STATUS_OK) {
-		DC_ERROR("Error starting DMUB execution: status=%d\n", status);
-		dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		if (status != DMUB_STATUS_POWER_STATE_D3) {
+			DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		}
 		return false;
 	}
 
@@ -218,7 +226,10 @@ bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int coun
 
 		if (status == DMUB_STATUS_QUEUE_FULL) {
 			/* Execute and wait for queue to become empty again. */
-			dmub_srv_cmd_execute(dmub);
+			status = dmub_srv_cmd_execute(dmub);
+			if (status == DMUB_STATUS_POWER_STATE_D3)
+				return false;
+
 			dmub_srv_wait_for_idle(dmub, 100000);
 
 			/* Requeue the command. */
@@ -226,22 +237,31 @@ bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int coun
 		}
 
 		if (status != DMUB_STATUS_OK) {
-			DC_ERROR("Error queueing DMUB command: status=%d\n", status);
-			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			if (status != DMUB_STATUS_POWER_STATE_D3) {
+				DC_ERROR("Error queueing DMUB command: status=%d\n", status);
+				dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+			}
 			return false;
 		}
 	}
 
 	status = dmub_srv_cmd_execute(dmub);
 	if (status != DMUB_STATUS_OK) {
-		DC_ERROR("Error starting DMUB execution: status=%d\n", status);
-		dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		if (status != DMUB_STATUS_POWER_STATE_D3) {
+			DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		}
 		return false;
 	}
 
 	// Wait for DMUB to process command
 	if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) {
-		status = dmub_srv_wait_for_idle(dmub, 100000);
+		if (dc_dmub_srv->ctx->dc->debug.disable_timeout) {
+			do {
+				status = dmub_srv_wait_for_idle(dmub, 100000);
+			} while (status != DMUB_STATUS_OK);
+		} else
+			status = dmub_srv_wait_for_idle(dmub, 100000);
 
 		if (status != DMUB_STATUS_OK) {
 			DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status);
@@ -282,17 +302,11 @@ bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv)
 bool dc_dmub_srv_notify_stream_mask(struct dc_dmub_srv *dc_dmub_srv,
 				    unsigned int stream_mask)
 {
-	struct dmub_srv *dmub;
-	const uint32_t timeout = 30;
-
 	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
 		return false;
 
-	dmub = dc_dmub_srv->dmub;
-
-	return dmub_srv_send_gpint_command(
-		       dmub, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK,
-		       stream_mask, timeout) == DMUB_STATUS_OK;
+	return dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK,
+					 stream_mask, NULL, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 bool dc_dmub_srv_is_restore_required(struct dc_dmub_srv *dc_dmub_srv)
@@ -341,7 +355,7 @@ void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal
 	cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
 
 	// Send the command to the DMCUB.
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst)
@@ -355,7 +369,7 @@ void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst)
 	cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
 
 	// Send the command to the DMCUB.
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 static uint8_t dc_dmub_srv_get_pipes_for_stream(struct dc *dc, struct dc_stream_state *stream)
@@ -448,7 +462,7 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru
 		sizeof(cmd.fw_assisted_mclk_switch) - sizeof(cmd.fw_assisted_mclk_switch.header);
 
 	// Send the command to the DMCUB.
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -469,7 +483,7 @@ void dc_dmub_srv_query_caps_cmd(struct dc_dmub_srv *dc_dmub_srv)
 	cmd.query_feature_caps.header.payload_bytes = sizeof(struct dmub_cmd_query_feature_caps_data);
 
 	/* If command was processed, copy feature caps to dmub srv */
-	if (dm_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	if (dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
 	    cmd.query_feature_caps.header.ret_status == 0) {
 		memcpy(&dc_dmub_srv->dmub->feature_caps,
 		       &cmd.query_feature_caps.query_feature_caps_data,
@@ -494,7 +508,7 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
 	cmd.visual_confirm_color.visual_confirm_color_data.visual_confirm_color.panel_inst = panel_inst;
 
 	// If command was processed, copy feature caps to dmub srv
-	if (dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	if (dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
 		cmd.visual_confirm_color.header.ret_status == 0) {
 		memcpy(&dc->ctx->dmub_srv->dmub->visual_confirm_color,
 			&cmd.visual_confirm_color.visual_confirm_color_data,
@@ -505,10 +519,11 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
 /**
  * populate_subvp_cmd_drr_info - Helper to populate DRR pipe info for the DMCUB subvp command
  *
- * @dc: [in] current dc state
+ * @dc: [in] pointer to dc object
  * @subvp_pipe: [in] pipe_ctx for the SubVP pipe
  * @vblank_pipe: [in] pipe_ctx for the DRR pipe
  * @pipe_data: [in] Pipe data which stores the VBLANK/DRR info
+ * @context: [in] DC state for access to phantom stream
  *
  * Populate the DMCUB SubVP command with DRR pipe info. All the information
  * required for calculating the SubVP + DRR microschedule is populated here.
@@ -519,12 +534,14 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
  * 3. Populate the drr_info with the min and max supported vtotal values
  */
 static void populate_subvp_cmd_drr_info(struct dc *dc,
+		struct dc_state *context,
 		struct pipe_ctx *subvp_pipe,
 		struct pipe_ctx *vblank_pipe,
 		struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data)
 {
+	struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
 	struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
-	struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+	struct dc_crtc_timing *phantom_timing = &phantom_stream->timing;
 	struct dc_crtc_timing *drr_timing = &vblank_pipe->stream->timing;
 	uint16_t drr_frame_us = 0;
 	uint16_t min_drr_supported_us = 0;
@@ -612,7 +629,7 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc,
 			continue;
 
 		// Find the SubVP pipe
-		if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
 			break;
 	}
 
@@ -629,7 +646,7 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc,
 
 	if (vblank_pipe->stream->ignore_msa_timing_param &&
 		(vblank_pipe->stream->allow_freesync || vblank_pipe->stream->vrr_active_variable || vblank_pipe->stream->vrr_active_fixed))
-		populate_subvp_cmd_drr_info(dc, pipe, vblank_pipe, pipe_data);
+		populate_subvp_cmd_drr_info(dc, context, pipe, vblank_pipe, pipe_data);
 }
 
 /**
@@ -654,10 +671,17 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc,
 	uint32_t subvp0_prefetch_us = 0;
 	uint32_t subvp1_prefetch_us = 0;
 	uint32_t prefetch_delta_us = 0;
-	struct dc_crtc_timing *phantom_timing0 = &subvp_pipes[0]->stream->mall_stream_config.paired_stream->timing;
-	struct dc_crtc_timing *phantom_timing1 = &subvp_pipes[1]->stream->mall_stream_config.paired_stream->timing;
+	struct dc_stream_state *phantom_stream0 = NULL;
+	struct dc_stream_state *phantom_stream1 = NULL;
+	struct dc_crtc_timing *phantom_timing0 = NULL;
+	struct dc_crtc_timing *phantom_timing1 = NULL;
 	struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL;
 
+	phantom_stream0 = dc_state_get_paired_subvp_stream(context, subvp_pipes[0]->stream);
+	phantom_stream1 = dc_state_get_paired_subvp_stream(context, subvp_pipes[1]->stream);
+	phantom_timing0 = &phantom_stream0->timing;
+	phantom_timing1 = &phantom_stream1->timing;
+
 	subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) *
 			(uint64_t)phantom_timing0->h_total * 1000000),
 			(((uint64_t)phantom_timing0->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
@@ -707,8 +731,9 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
 	uint32_t j;
 	struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data =
 			&cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index];
+	struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
 	struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
-	struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+	struct dc_crtc_timing *phantom_timing = &phantom_stream->timing;
 	uint32_t out_num_stream, out_den_stream, out_num_plane, out_den_plane, out_num, out_den;
 
 	pipe_data->mode = SUBVP;
@@ -762,7 +787,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
 	for (j = 0; j < dc->res_pool->pipe_count; j++) {
 		struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j];
 
-		if (phantom_pipe->stream == subvp_pipe->stream->mall_stream_config.paired_stream) {
+		if (phantom_pipe->stream == dc_state_get_paired_subvp_stream(context, subvp_pipe->stream)) {
 			pipe_data->pipe_config.subvp_data.phantom_pipe_index = phantom_pipe->stream_res.tg->inst;
 			if (phantom_pipe->bottom_pipe) {
 				pipe_data->pipe_config.subvp_data.phantom_split_pipe_index = phantom_pipe->bottom_pipe->plane_res.hubp->inst;
@@ -796,6 +821,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 	union dmub_rb_cmd cmd;
 	struct pipe_ctx *subvp_pipes[2];
 	uint32_t wm_val_refclk = 0;
+	enum mall_stream_type pipe_mall_type;
 
 	memset(&cmd, 0, sizeof(cmd));
 	// FW command for SUBVP
@@ -811,7 +837,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 		 */
 		if (resource_is_pipe_type(pipe, OTG_MASTER) &&
 				resource_is_pipe_type(pipe, DPP_PIPE) &&
-				pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
 			subvp_pipes[subvp_count++] = pipe;
 	}
 
@@ -819,6 +845,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 		// For each pipe that is a "main" SUBVP pipe, fill in pipe data for DMUB SUBVP cmd
 		for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+			pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 			if (!pipe->stream)
 				continue;
@@ -829,12 +856,11 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 			 */
 			if (resource_is_pipe_type(pipe, OTG_MASTER) &&
 					resource_is_pipe_type(pipe, DPP_PIPE) &&
-					pipe->stream->mall_stream_config.paired_stream &&
-					pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+					pipe_mall_type == SUBVP_MAIN) {
 				populate_subvp_cmd_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++);
 			} else if (resource_is_pipe_type(pipe, OTG_MASTER) &&
 					resource_is_pipe_type(pipe, DPP_PIPE) &&
-					pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+					pipe_mall_type == SUBVP_NONE) {
 				// Don't need to check for ActiveDRAMClockChangeMargin < 0, not valid in cases where
 				// we run through DML without calculating "natural" P-state support
 				populate_subvp_cmd_vblank_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++);
@@ -856,7 +882,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 		cmd.fw_assisted_mclk_switch_v2.config_data.watermark_a_cache = wm_val_refclk < 0xFFFF ? wm_val_refclk : 0xFFFF;
 	}
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *diag_data)
@@ -1093,7 +1119,7 @@ void dc_send_update_cursor_info_to_dmu(
 				pipe_idx, pCtx->plane_res.hubp, pCtx->plane_res.dpp);
 
 		/* Combine 2nd cmds update_curosr_info to DMU */
-		dm_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT);
+		dc_wake_and_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT);
 	}
 }
 
@@ -1107,25 +1133,20 @@ bool dc_dmub_check_min_version(struct dmub_srv *srv)
 void dc_dmub_srv_enable_dpia_trace(const struct dc *dc)
 {
 	struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
-	struct dmub_srv *dmub;
-	enum dmub_status status;
-	static const uint32_t timeout_us = 30;
 
 	if (!dc_dmub_srv || !dc_dmub_srv->dmub) {
 		DC_LOG_ERROR("%s: invalid parameters.", __func__);
 		return;
 	}
 
-	dmub = dc_dmub_srv->dmub;
-
-	status = dmub_srv_send_gpint_command(dmub, DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1, 0x0010, timeout_us);
-	if (status != DMUB_STATUS_OK) {
+	if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1,
+				       0x0010, NULL, DM_DMUB_WAIT_TYPE_WAIT)) {
 		DC_LOG_ERROR("timeout updating trace buffer mask word\n");
 		return;
 	}
 
-	status = dmub_srv_send_gpint_command(dmub, DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK, 0x0000, timeout_us);
-	if (status != DMUB_STATUS_OK) {
+	if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK,
+				       0x0000, NULL, DM_DMUB_WAIT_TYPE_WAIT)) {
 		DC_LOG_ERROR("timeout updating trace buffer mask word\n");
 		return;
 	}
@@ -1143,14 +1164,23 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
 	struct dc_context *dc_ctx = dc_dmub_srv->ctx;
 	enum dmub_status status;
 
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return true;
+
 	if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation)
 		return true;
 
 	if (wait) {
-		status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000);
-		if (status != DMUB_STATUS_OK) {
-			DC_ERROR("Error querying DMUB hw power up status: error=%d\n", status);
-			return false;
+		if (dc_dmub_srv->ctx->dc->debug.disable_timeout) {
+			do {
+				status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000);
+			} while (status != DMUB_STATUS_OK);
+		} else {
+			status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000);
+			if (status != DMUB_STATUS_OK) {
+				DC_ERROR("Error querying DMUB hw power up status: error=%d\n", status);
+				return false;
+			}
 		}
 	} else
 		return dmub_srv_is_hw_pwr_up(dc_dmub_srv->dmub);
@@ -1158,7 +1188,7 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
 	return true;
 }
 
-void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
+static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
 {
 	union dmub_rb_cmd cmd = {0};
 
@@ -1179,20 +1209,20 @@ void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
 			dc->hwss.set_idle_state(dc, true);
 	}
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	/* NOTE: This does not use the "wake" interface since this is part of the wake path. */
+	/* We also do not perform a wait since DMCUB could enter idle after the notification. */
+	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 }
 
-void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
+static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 {
-	const uint32_t max_num_polls = 10000;
 	uint32_t allow_state = 0;
 	uint32_t commit_state = 0;
-	uint32_t i;
 
 	if (dc->debug.dmcub_emulation)
 		return;
 
-	if (!dc->idle_optimizations_allowed)
+	if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub)
 		return;
 
 	if (dc->hwss.get_idle_state &&
@@ -1204,8 +1234,16 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 
 		if (!(allow_state & DMUB_IPS2_ALLOW_MASK)) {
 			// Wait for evaluation time
-			udelay(dc->debug.ips2_eval_delay_us);
-			commit_state = dc->hwss.get_idle_state(dc);
+			for (;;) {
+				udelay(dc->debug.ips2_eval_delay_us);
+				commit_state = dc->hwss.get_idle_state(dc);
+				if (commit_state & DMUB_IPS2_ALLOW_MASK)
+					break;
+
+				/* allow was still set, retry eval delay */
+				dc->hwss.set_idle_state(dc, false);
+			}
+
 			if (!(commit_state & DMUB_IPS2_COMMIT_MASK)) {
 				// Tell PMFW to exit low power state
 				dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
@@ -1214,14 +1252,13 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 				udelay(dc->debug.ips2_entry_delay_us);
 				dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
 
-				for (i = 0; i < max_num_polls; ++i) {
+				for (;;) {
 					commit_state = dc->hwss.get_idle_state(dc);
 					if (commit_state & DMUB_IPS2_COMMIT_MASK)
 						break;
 
 					udelay(1);
 				}
-				ASSERT(i < max_num_polls);
 
 				if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true))
 					ASSERT(0);
@@ -1236,14 +1273,13 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 
 		dc_dmub_srv_notify_idle(dc, false);
 		if (!(allow_state & DMUB_IPS1_ALLOW_MASK)) {
-			for (i = 0; i < max_num_polls; ++i) {
+			for (;;) {
 				commit_state = dc->hwss.get_idle_state(dc);
 				if (commit_state & DMUB_IPS1_COMMIT_MASK)
 					break;
 
 				udelay(1);
 			}
-			ASSERT(i < max_num_polls);
 		}
 	}
 
@@ -1251,3 +1287,131 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 		ASSERT(0);
 }
 
+void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState)
+{
+	struct dmub_srv *dmub;
+
+	if (!dc_dmub_srv)
+		return;
+
+	dmub = dc_dmub_srv->dmub;
+
+	if (powerState == DC_ACPI_CM_POWER_STATE_D0)
+		dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D0);
+	else
+		dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D3);
+}
+
+void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle)
+{
+	struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return;
+
+	if (dc_dmub_srv->idle_allowed == allow_idle)
+		return;
+
+	/*
+	 * Entering a low power state requires a driver notification.
+	 * Powering up the hardware requires notifying PMFW and DMCUB.
+	 * Clearing the driver idle allow requires a DMCUB command.
+	 * DMCUB commands requires the DMCUB to be powered up and restored.
+	 *
+	 * Exit out early to prevent an infinite loop of DMCUB commands
+	 * triggering exit low power - use software state to track this.
+	 */
+	dc_dmub_srv->idle_allowed = allow_idle;
+
+	if (!allow_idle)
+		dc_dmub_srv_exit_low_power_state(dc);
+	else
+		dc_dmub_srv_notify_idle(dc, allow_idle);
+}
+
+bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd,
+				  enum dm_dmub_wait_type wait_type)
+{
+	return dc_wake_and_execute_dmub_cmd_list(ctx, 1, cmd, wait_type);
+}
+
+bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count,
+				       union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type)
+{
+	struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+	bool result = false, reallow_idle = false;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return false;
+
+	if (count == 0)
+		return true;
+
+	if (dc_dmub_srv->idle_allowed) {
+		dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false);
+		reallow_idle = true;
+	}
+
+	/*
+	 * These may have different implementations in DM, so ensure
+	 * that we guide it to the expected helper.
+	 */
+	if (count > 1)
+		result = dm_execute_dmub_cmd_list(ctx, count, cmd, wait_type);
+	else
+		result = dm_execute_dmub_cmd(ctx, cmd, wait_type);
+
+	if (result && reallow_idle)
+		dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true);
+
+	return result;
+}
+
+static bool dc_dmub_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+				  uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type)
+{
+	struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+	const uint32_t wait_us = wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT ? 0 : 30;
+	enum dmub_status status;
+
+	if (response)
+		*response = 0;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return false;
+
+	status = dmub_srv_send_gpint_command(dc_dmub_srv->dmub, command_code, param, wait_us);
+	if (status != DMUB_STATUS_OK) {
+		if (status == DMUB_STATUS_TIMEOUT && wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT)
+			return true;
+
+		return false;
+	}
+
+	if (response && wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)
+		dmub_srv_get_gpint_response(dc_dmub_srv->dmub, response);
+
+	return true;
+}
+
+bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+			       uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type)
+{
+	struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+	bool result = false, reallow_idle = false;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return false;
+
+	if (dc_dmub_srv->idle_allowed) {
+		dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false);
+		reallow_idle = true;
+	}
+
+	result = dc_dmub_execute_gpint(ctx, command_code, param, response, wait_type);
+
+	if (result && reallow_idle)
+		dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true);
+
+	return result;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index d4a60f53faab..952bfb368886 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -50,6 +50,8 @@ struct dc_dmub_srv {
 
 	struct dc_context *ctx;
 	void *dm;
+
+	bool idle_allowed;
 };
 
 void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv);
@@ -100,6 +102,59 @@ void dc_dmub_srv_enable_dpia_trace(const struct dc *dc);
 void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, const struct dc_plane_address *addr, uint8_t subvp_index);
 
 bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait);
-void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle);
-void dc_dmub_srv_exit_low_power_state(const struct dc *dc);
+
+void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle);
+
+void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState);
+
+/**
+ * dc_wake_and_execute_dmub_cmd() - Wrapper for DMUB command execution.
+ *
+ * Refer to dc_wake_and_execute_dmub_cmd_list() for usage and limitations,
+ * This function is a convenience wrapper for a single command execution.
+ *
+ * @ctx: DC context
+ * @cmd: The command to send/receive
+ * @wait_type: The wait behavior for the execution
+ *
+ * Return: true on command submission success, false otherwise
+ */
+bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd,
+				  enum dm_dmub_wait_type wait_type);
+
+/**
+ * dc_wake_and_execute_dmub_cmd_list() - Wrapper for DMUB command list execution.
+ *
+ * If the DMCUB hardware was asleep then it wakes the DMUB before
+ * executing the command and attempts to re-enter if the command
+ * submission was successful.
+ *
+ * This should be the preferred command submission interface provided
+ * the DC lock is acquired.
+ *
+ * Entry/exit out of idle power optimizations would need to be
+ * manually performed otherwise through dc_allow_idle_optimizations().
+ *
+ * @ctx: DC context
+ * @count: Number of commands to send/receive
+ * @cmd: Array of commands to send
+ * @wait_type: The wait behavior for the execution
+ *
+ * Return: true on command submission success, false otherwise
+ */
+bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count,
+				       union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type);
+
+/**
+ * dc_wake_and_execute_gpint()
+ *
+ * @ctx: DC context
+ * @command_code: The command ID to send to DMCUB
+ * @param: The parameter to message DMCUB
+ * @response: Optional response out value - may be NULL.
+ * @wait_type: The wait behavior for the execution
+ */
+bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+			       uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type);
+
 #endif /* _DMUB_DC_SRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index eeeeeef4d717..1cb7765f593a 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -1377,6 +1377,12 @@ struct dp_trace {
 #ifndef DP_TUNNELING_STATUS
 #define DP_TUNNELING_STATUS				0xE0025 /* 1.4a */
 #endif
+#ifndef DP_TUNNELING_MAX_LINK_RATE
+#define DP_TUNNELING_MAX_LINK_RATE			0xE0028 /* 1.4a */
+#endif
+#ifndef DP_TUNNELING_MAX_LANE_COUNT
+#define DP_TUNNELING_MAX_LANE_COUNT			0xE0029 /* 1.4a */
+#endif
 #ifndef DPTX_BW_ALLOCATION_MODE_CONTROL
 #define DPTX_BW_ALLOCATION_MODE_CONTROL			0xE0030 /* 1.4a */
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index cb6eaddab720..8f9a67825615 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -50,7 +50,7 @@ static inline void submit_dmub_read_modify_write(
 	cmd_buf->header.payload_bytes =
 			sizeof(struct dmub_cmd_read_modify_write_sequence) * offload->reg_seq_count;
 
-	dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
+	dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 	memset(cmd_buf, 0, sizeof(*cmd_buf));
 
@@ -67,7 +67,7 @@ static inline void submit_dmub_burst_write(
 	cmd_buf->header.payload_bytes =
 			sizeof(uint32_t) * offload->reg_seq_count;
 
-	dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
+	dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 	memset(cmd_buf, 0, sizeof(*cmd_buf));
 
@@ -80,7 +80,7 @@ static inline void submit_dmub_reg_wait(
 {
 	struct dmub_rb_cmd_reg_wait *cmd_buf = &offload->cmd_data.reg_wait;
 
-	dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
+	dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 	memset(cmd_buf, 0, sizeof(*cmd_buf));
 	offload->reg_seq_count = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index 9649934ea186..811474f4419b 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -244,7 +244,7 @@ enum pixel_format {
 #define DC_MAX_DIRTY_RECTS 3
 struct dc_flip_addrs {
 	struct dc_plane_address address;
-	unsigned int flip_timestamp_in_us;
+	unsigned long long flip_timestamp_in_us;
 	bool flip_immediate;
 	/* TODO: add flip duration for FreeSync */
 	bool triplebuffer_flips;
@@ -465,6 +465,7 @@ struct dc_cursor_mi_param {
 	struct fixed31_32 v_scale_ratio;
 	enum dc_rotation_angle rotation;
 	bool mirror;
+	struct dc_stream_state *stream;
 };
 
 /* IPP related types */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane.h b/drivers/gpu/drm/amd/display/dc/dc_plane.h
new file mode 100644
index 000000000000..ef380cae816a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_plane.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_PLANE_H_
+#define _DC_PLANE_H_
+
+#include "dc.h"
+#include "dc_hw_types.h"
+
+struct dc_plane_state *dc_create_plane_state(struct dc *dc);
+const struct dc_plane_status *dc_plane_get_status(
+		const struct dc_plane_state *plane_state);
+void dc_plane_state_retain(struct dc_plane_state *plane_state);
+void dc_plane_state_release(struct dc_plane_state *plane_state);
+
+#endif /* _DC_PLANE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h
new file mode 100644
index 000000000000..9ee184c1df00
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_PLANE_PRIV_H_
+#define _DC_PLANE_PRIV_H_
+
+#include "dc_plane.h"
+
+void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state);
+void dc_plane_destruct(struct dc_plane_state *plane_state);
+
+#endif /* _DC_PLANE_PRIV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_state.h b/drivers/gpu/drm/amd/display/dc/dc_state.h
new file mode 100644
index 000000000000..d167fdbfa8a9
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_state.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STATE_H_
+#define _DC_STATE_H_
+
+#include "dc.h"
+#include "inc/core_status.h"
+
+struct dc_state *dc_state_create(struct dc *dc);
+void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state);
+struct dc_state *dc_state_create_copy(struct dc_state *src_state);
+void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state);
+struct dc_state *dc_state_create_current_copy(struct dc *dc);
+void dc_state_construct(struct dc *dc, struct dc_state *state);
+void dc_state_destruct(struct dc_state *state);
+void dc_state_retain(struct dc_state *state);
+void dc_state_release(struct dc_state *state);
+
+enum dc_status dc_state_add_stream(struct dc *dc,
+				    struct dc_state *state,
+				    struct dc_stream_state *stream);
+
+enum dc_status dc_state_remove_stream(
+		struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *stream);
+
+bool dc_state_add_plane(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state *plane_state,
+		struct dc_state *state);
+
+bool dc_state_remove_plane(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state *plane_state,
+		struct dc_state *state);
+
+bool dc_state_rem_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_state *state);
+
+bool dc_state_add_all_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *stream,
+		struct dc_plane_state * const *plane_states,
+		int plane_count,
+		struct dc_state *state);
+
+struct dc_stream_status *dc_state_get_stream_status(
+	struct dc_state *state,
+	struct dc_stream_state *stream);
+#endif /* _DC_STATE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
new file mode 100644
index 000000000000..c1f44e09a6c1
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STATE_PRIV_H_
+#define _DC_STATE_PRIV_H_
+
+#include "dc_state.h"
+#include "dc_stream.h"
+
+/* Get the type of the provided resource (none, phantom, main) based on the provided
+ * context. If the context is unavailable, determine only if phantom or not.
+ */
+enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state,
+		const struct pipe_ctx *pipe_ctx);
+enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state,
+		const struct dc_stream_state *stream);
+
+/* Gets the phantom stream if main is provided, gets the main if phantom is provided.*/
+struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state,
+		const struct dc_stream_state *stream);
+
+/* allocate's phantom stream or plane and returns pointer to the object */
+struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *main_stream);
+struct dc_plane_state *dc_state_create_phantom_plane(struct dc *dc,
+		struct dc_state *state,
+		struct dc_plane_state *main_plane);
+
+/* deallocate's phantom stream or plane */
+void dc_state_release_phantom_stream(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream);
+void dc_state_release_phantom_plane(const struct dc *dc,
+		struct dc_state *state,
+		struct dc_plane_state *phantom_plane);
+
+/* add/remove phantom stream to context and generate subvp meta data */
+enum dc_status dc_state_add_phantom_stream(struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream,
+		struct dc_stream_state *main_stream);
+enum dc_status dc_state_remove_phantom_stream(struct dc *dc,
+		struct dc_state *state,
+		struct dc_stream_state *phantom_stream);
+
+bool dc_state_add_phantom_plane(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state *phantom_plane,
+		struct dc_state *state);
+
+bool dc_state_remove_phantom_plane(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state *phantom_plane,
+		struct dc_state *state);
+
+bool dc_state_rem_all_phantom_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_state *state,
+		bool should_release_planes);
+
+bool dc_state_add_all_phantom_planes_for_stream(
+		const struct dc *dc,
+		struct dc_stream_state *phantom_stream,
+		struct dc_plane_state * const *phantom_planes,
+		int plane_count,
+		struct dc_state *state);
+
+bool dc_state_remove_phantom_streams_and_planes(
+		struct dc *dc,
+		struct dc_state *state);
+
+void dc_state_release_phantom_streams_and_planes(
+		struct dc *dc,
+		struct dc_state *state);
+
+#endif /* _DC_STATE_PRIV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index e61eea6db29c..a23eebd9933b 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -38,6 +38,14 @@ struct timing_sync_info {
 	bool master;
 };
 
+struct mall_stream_config {
+	/* MALL stream config to indicate if the stream is phantom or not.
+	 * We will use a phantom stream to indicate that the pipe is phantom.
+	 */
+	enum mall_stream_type type;
+	struct dc_stream_state *paired_stream;	// master / slave stream
+};
+
 struct dc_stream_status {
 	int primary_otg_inst;
 	int stream_enc_inst;
@@ -50,6 +58,7 @@ struct dc_stream_status {
 	struct timing_sync_info timing_sync_info;
 	struct dc_plane_state *plane_states[MAX_SURFACE_NUM];
 	bool is_abm_supported;
+	struct mall_stream_config mall_stream_config;
 };
 
 enum hubp_dmdata_mode {
@@ -130,7 +139,6 @@ union stream_update_flags {
 		uint32_t wb_update:1;
 		uint32_t dsc_changed : 1;
 		uint32_t mst_bw : 1;
-		uint32_t crtc_timing_adjust : 1;
 		uint32_t fams_changed : 1;
 	} bits;
 
@@ -147,31 +155,6 @@ struct test_pattern {
 
 #define SUBVP_DRR_MARGIN_US 100 // 100us for DRR margin (SubVP + DRR)
 
-enum mall_stream_type {
-	SUBVP_NONE, // subvp not in use
-	SUBVP_MAIN, // subvp in use, this stream is main stream
-	SUBVP_PHANTOM, // subvp in use, this stream is a phantom stream
-};
-
-struct mall_stream_config {
-	/* MALL stream config to indicate if the stream is phantom or not.
-	 * We will use a phantom stream to indicate that the pipe is phantom.
-	 */
-	enum mall_stream_type type;
-	struct dc_stream_state *paired_stream;	// master / slave stream
-};
-
-/* Temp struct used to save and restore MALL config
- * during validation.
- *
- * TODO: Move MALL config into dc_state instead of stream struct
- * to avoid needing to save/restore.
- */
-struct mall_temp_config {
-	struct mall_stream_config mall_stream_config[MAX_PIPES];
-	bool is_phantom_plane[MAX_PIPES];
-};
-
 struct dc_stream_debug_options {
 	char force_odm_combine_segments;
 };
@@ -301,7 +284,7 @@ struct dc_stream_state {
 	bool has_non_synchronizable_pclk;
 	bool vblank_synchronized;
 	bool fpo_in_use;
-	struct mall_stream_config mall_stream_config;
+	bool is_phantom;
 };
 
 #define ABM_LEVEL_IMMEDIATE_DISABLE 255
@@ -342,7 +325,6 @@ struct dc_stream_update {
 	struct dc_3dlut *lut3d_func;
 
 	struct test_pattern *pending_test_pattern;
-	struct dc_crtc_timing_adjust *crtc_timing_adjust;
 };
 
 bool dc_is_stream_unchanged(
@@ -415,45 +397,14 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream,
 				  uint32_t *h_position,
 				  uint32_t *v_position);
 
-enum dc_status dc_add_stream_to_ctx(
-			struct dc *dc,
-		struct dc_state *new_ctx,
-		struct dc_stream_state *stream);
-
-enum dc_status dc_remove_stream_from_ctx(
-		struct dc *dc,
-			struct dc_state *new_ctx,
-			struct dc_stream_state *stream);
-
-
-bool dc_add_plane_to_context(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state *plane_state,
-		struct dc_state *context);
-
-bool dc_remove_plane_from_context(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state *plane_state,
-		struct dc_state *context);
-
-bool dc_rem_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_state *context);
-
-bool dc_add_all_planes_for_stream(
-		const struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_plane_state * const *plane_states,
-		int plane_count,
-		struct dc_state *context);
-
 bool dc_stream_add_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		struct dc_writeback_info *wb_info);
 
+bool dc_stream_fc_disable_writeback(struct dc *dc,
+		struct dc_stream_state *stream,
+		uint32_t dwb_pipe_inst);
+
 bool dc_stream_remove_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		uint32_t dwb_pipe_inst);
@@ -514,9 +465,6 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink);
 void dc_stream_retain(struct dc_stream_state *dc_stream);
 void dc_stream_release(struct dc_stream_state *dc_stream);
 
-struct dc_stream_status *dc_stream_get_status_from_state(
-	struct dc_state *state,
-	struct dc_stream_state *stream);
 struct dc_stream_status *dc_stream_get_status(
 	struct dc_stream_state *dc_stream);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h
new file mode 100644
index 000000000000..7476fd52ce2b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STREAM_PRIV_H_
+#define _DC_STREAM_PRIV_H_
+
+#include "dc_stream.h"
+
+bool dc_stream_construct(struct dc_stream_state *stream,
+	struct dc_sink *dc_sink_data);
+void dc_stream_destruct(struct dc_stream_state *stream);
+
+void dc_stream_assign_stream_id(struct dc_stream_state *stream);
+
+#endif // _DC_STREAM_PRIV_H_
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index fcb825e4f1bb..4f276169e05a 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -991,10 +991,6 @@ struct link_mst_stream_allocation_table {
 	struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM];
 };
 
-struct backlight_settings {
-	uint32_t backlight_millinits;
-};
-
 /* PSR feature flags */
 struct psr_settings {
 	bool psr_feature_enabled;		// PSR is supported by sink
@@ -1022,6 +1018,24 @@ enum replay_coasting_vtotal_type {
 	PR_COASTING_TYPE_NUM,
 };
 
+enum replay_link_off_frame_count_level {
+	PR_LINK_OFF_FRAME_COUNT_FAIL = 0x0,
+	PR_LINK_OFF_FRAME_COUNT_GOOD = 0x2,
+	PR_LINK_OFF_FRAME_COUNT_BEST = 0x6,
+};
+
+/*
+ * This is general Interface for Replay to
+ * set an 32 bit variable to dmub
+ * The Message_type indicates which variable
+ * passed to DMUB.
+ */
+enum replay_FW_Message_type {
+	Replay_Msg_Not_Support = -1,
+	Replay_Set_Timing_Sync_Supported,
+	Replay_Set_Residency_Frameupdate_Timer,
+};
+
 union replay_error_status {
 	struct {
 		unsigned char STATE_TRANSITION_ERROR    :1;
@@ -1033,26 +1047,48 @@ union replay_error_status {
 };
 
 struct replay_config {
-	bool replay_supported;                          // Replay feature is supported
-	unsigned int replay_power_opt_supported;        // Power opt flags that are supported
-	bool replay_smu_opt_supported;                  // SMU optimization is supported
-	unsigned int replay_enable_option;              // Replay enablement option
-	uint32_t debug_flags;                           // Replay debug flags
-	bool replay_timing_sync_supported; // Replay desync is supported
-	bool force_disable_desync_error_check;             // Replay desync is supported
-	bool received_desync_error_hpd; //Replay Received Desync Error HPD.
-	union replay_error_status replay_error_status; // Replay error status
-};
-
-/* Replay feature flags */
+	/* Replay feature is supported */
+	bool replay_supported;
+	/* Power opt flags that are supported */
+	unsigned int replay_power_opt_supported;
+	/* SMU optimization is supported */
+	bool replay_smu_opt_supported;
+	/* Replay enablement option */
+	unsigned int replay_enable_option;
+	/* Replay debug flags */
+	uint32_t debug_flags;
+	/* Replay sync is supported */
+	bool replay_timing_sync_supported;
+	/* Replay Disable desync error check. */
+	bool force_disable_desync_error_check;
+	/* Replay Received Desync Error HPD. */
+	bool received_desync_error_hpd;
+	/* Replay feature is supported long vblank */
+	bool replay_support_fast_resync_in_ultra_sleep_mode;
+	/* Replay error status */
+	union replay_error_status replay_error_status;
+};
+
+/* Replay feature flags*/
 struct replay_settings {
-	struct replay_config config;            // Replay configuration
-	bool replay_feature_enabled;            // Replay feature is ready for activating
-	bool replay_allow_active;               // Replay is currently active
-	unsigned int replay_power_opt_active;   // Power opt flags that are activated currently
-	bool replay_smu_opt_enable;             // SMU optimization is enabled
-	uint16_t coasting_vtotal;               // Current Coasting vtotal
-	uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM]; // Coasting vtotal table
+	/* Replay configuration */
+	struct replay_config config;
+	/* Replay feature is ready for activating */
+	bool replay_feature_enabled;
+	/* Replay is currently active */
+	bool replay_allow_active;
+	/* Replay is currently active */
+	bool replay_allow_long_vblank;
+	/* Power opt flags that are activated currently */
+	unsigned int replay_power_opt_active;
+	/* SMU optimization is enabled */
+	bool replay_smu_opt_enable;
+	/* Current Coasting vtotal */
+	uint16_t coasting_vtotal;
+	/* Coasting vtotal table */
+	uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM];
+	/* Maximum link off frame count */
+	enum replay_link_off_frame_count_level link_off_frame_count_level;
 };
 
 /* To split out "global" and "per-panel" config settings.
@@ -1115,6 +1151,8 @@ struct dc_dpia_bw_alloc {
 	int bw_granularity;    // BW Granularity
 	bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3:  DP-Tx & Dpia & CM
 	bool response_ready;   // Response ready from the CM side
+	uint8_t nrd_max_lane_count; // Non-reduced max lane count
+	uint8_t nrd_max_link_rate; // Non-reduced max link rate
 };
 
 #define MAX_SINKS_PER_LINK 4
@@ -1125,4 +1163,9 @@ enum dc_hpd_enable_select {
 	HPD_EN_FOR_SECONDARY_EDP_ONLY,
 };
 
+enum mall_stream_type {
+	SUBVP_NONE, // subvp not in use
+	SUBVP_MAIN, // subvp in use, this stream is main stream
+	SUBVP_PHANTOM, // subvp in use, this stream is a phantom stream
+};
 #endif /* DC_TYPES_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index 874b132fe1d7..a6006776333d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -135,7 +135,7 @@ static void dmcu_set_backlight_level(
 			0, 1, 80000);
 }
 
-static void dce_abm_init(struct abm *abm, uint32_t backlight)
+static void dce_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level)
 {
 	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
 
@@ -162,7 +162,7 @@ static void dce_abm_init(struct abm *abm, uint32_t backlight)
 			BL1_PWM_TARGET_ABM_LEVEL, backlight);
 
 	REG_UPDATE(BL1_PWM_USER_LEVEL,
-			BL1_PWM_USER_LEVEL, backlight);
+			BL1_PWM_USER_LEVEL, user_level);
 
 	REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES,
 			ABM1_LS_MIN_PIXEL_VALUE_THRES, 0,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
index d3e6544022b7..ccc154b0281c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
@@ -57,18 +57,22 @@ static unsigned int abm_feature_support(struct abm *abm, unsigned int panel_inst
 	return ret;
 }
 
-static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight)
+static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight, uint32_t user_level)
 {
-	dmub_abm_init(abm, backlight);
+	dmub_abm_init(abm, backlight, user_level);
 }
 
 static unsigned int dmub_abm_get_current_backlight_ex(struct abm *abm)
 {
+	dc_allow_idle_optimizations(abm->ctx->dc, false);
+
 	return dmub_abm_get_current_backlight(abm);
 }
 
 static unsigned int dmub_abm_get_target_backlight_ex(struct abm *abm)
 {
+	dc_allow_idle_optimizations(abm->ctx->dc, false);
+
 	return dmub_abm_get_target_backlight(abm);
 }
 
@@ -145,7 +149,11 @@ static bool dmub_abm_save_restore_ex(
 	return ret;
 }
 
-static bool dmub_abm_set_pipe_ex(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst)
+static bool dmub_abm_set_pipe_ex(struct abm *abm,
+		uint32_t otg_inst,
+		uint32_t option,
+		uint32_t panel_inst,
+		uint32_t pwrseq_inst)
 {
 	bool ret = false;
 	unsigned int feature_support;
@@ -153,7 +161,7 @@ static bool dmub_abm_set_pipe_ex(struct abm *abm, uint32_t otg_inst, uint32_t op
 	feature_support = abm_feature_support(abm, panel_inst);
 
 	if (feature_support == ABM_LCD_SUPPORT)
-		ret = dmub_abm_set_pipe(abm, otg_inst, option, panel_inst);
+		ret = dmub_abm_set_pipe(abm, otg_inst, option, panel_inst, pwrseq_inst);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
index 592a8f7a1c6d..f9d6a181164a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
@@ -76,10 +76,10 @@ static void dmub_abm_enable_fractional_pwm(struct dc_context *dc)
 	cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.panel_mask = panel_mask;
 	cmd.abm_set_pwm_frac.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pwm_frac_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
-void dmub_abm_init(struct abm *abm, uint32_t backlight)
+void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level)
 {
 	struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
 
@@ -106,7 +106,7 @@ void dmub_abm_init(struct abm *abm, uint32_t backlight)
 			BL1_PWM_TARGET_ABM_LEVEL, backlight);
 
 	REG_UPDATE(BL1_PWM_USER_LEVEL,
-			BL1_PWM_USER_LEVEL, backlight);
+			BL1_PWM_USER_LEVEL, user_level);
 
 	REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES,
 			ABM1_LS_MIN_PIXEL_VALUE_THRES, 0,
@@ -155,7 +155,7 @@ bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask)
 	cmd.abm_set_level.abm_set_level_data.panel_mask = panel_mask;
 	cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_level_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -186,7 +186,7 @@ void dmub_abm_init_config(struct abm *abm,
 
 	cmd.abm_init_config.header.payload_bytes = sizeof(struct dmub_cmd_abm_init_config_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 }
 
@@ -203,7 +203,7 @@ bool dmub_abm_set_pause(struct abm *abm, bool pause, unsigned int panel_inst, un
 	cmd.abm_pause.abm_pause_data.panel_mask = panel_mask;
 	cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_pause_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -246,7 +246,7 @@ bool dmub_abm_save_restore(
 
 	cmd.abm_save_restore.header.payload_bytes = sizeof(struct dmub_rb_cmd_abm_save_restore);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	// Copy iramtable data into local structure
 	memcpy((void *)pData, dc->dmub_srv->dmub->scratch_mem_fb.cpu_addr, bytes);
@@ -254,7 +254,11 @@ bool dmub_abm_save_restore(
 	return true;
 }
 
-bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst)
+bool dmub_abm_set_pipe(struct abm *abm,
+		uint32_t otg_inst,
+		uint32_t option,
+		uint32_t panel_inst,
+		uint32_t pwrseq_inst)
 {
 	union dmub_rb_cmd cmd;
 	struct dc_context *dc = abm->ctx;
@@ -264,12 +268,13 @@ bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint
 	cmd.abm_set_pipe.header.type = DMUB_CMD__ABM;
 	cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE;
 	cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst;
+	cmd.abm_set_pipe.abm_set_pipe_data.pwrseq_inst = pwrseq_inst;
 	cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option;
 	cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst;
 	cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary;
 	cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -291,7 +296,7 @@ bool dmub_abm_set_backlight_level(struct abm *abm,
 	cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst);
 	cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
index 853564d7f471..761685e5b8c9 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
@@ -30,7 +30,7 @@
 
 struct abm_save_restore;
 
-void dmub_abm_init(struct abm *abm, uint32_t backlight);
+void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level);
 bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask);
 unsigned int dmub_abm_get_current_backlight(struct abm *abm);
 unsigned int dmub_abm_get_target_backlight(struct abm *abm);
@@ -44,7 +44,7 @@ bool dmub_abm_save_restore(
 		struct dc_context *dc,
 		unsigned int panel_inst,
 		struct abm_save_restore *pData);
-bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst);
+bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst);
 bool dmub_abm_set_backlight_level(struct abm *abm,
 		unsigned int backlight_pwm_u16_16,
 		unsigned int frame_ramp,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
index 2aa0e01a6891..ba1fec3016d5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
@@ -47,7 +47,7 @@ void dmub_hw_lock_mgr_cmd(struct dc_dmub_srv *dmub_srv,
 	if (!lock)
 		cmd.lock_hw.lock_hw_data.should_release = 1;
 
-	dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
index d8009b2dc56a..98a778996e1a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
@@ -48,5 +48,5 @@ void dmub_enable_outbox_notification(struct dc_dmub_srv *dmub_srv)
 		sizeof(cmd.outbox1_enable.header);
 	cmd.outbox1_enable.enable = true;
 
-	dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
index 9d4170a356a2..3e243e407bb8 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
@@ -105,23 +105,18 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state)
  */
 static void dmub_psr_get_state(struct dmub_psr *dmub, enum dc_psr_state *state, uint8_t panel_inst)
 {
-	struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub;
 	uint32_t raw_state = 0;
 	uint32_t retry_count = 0;
-	enum dmub_status status;
 
 	do {
 		// Send gpint command and wait for ack
-		status = dmub_srv_send_gpint_command(srv, DMUB_GPINT__GET_PSR_STATE, panel_inst, 30);
-
-		if (status == DMUB_STATUS_OK) {
-			// GPINT was executed, get response
-			dmub_srv_get_gpint_response(srv, &raw_state);
+		if (dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__GET_PSR_STATE, panel_inst, &raw_state,
+					      DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
 			*state = convert_psr_state(raw_state);
-		} else
+		} else {
 			// Return invalid state when GPINT times out
 			*state = PSR_STATE_INVALID;
-
+		}
 	} while (++retry_count <= 1000 && *state == PSR_STATE_INVALID);
 
 	// Assert if max retry hit
@@ -171,7 +166,7 @@ static bool dmub_psr_set_version(struct dmub_psr *dmub, struct dc_stream_state *
 	cmd.psr_set_version.psr_set_version_data.panel_inst = panel_inst;
 	cmd.psr_set_version.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_version_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -199,7 +194,7 @@ static void dmub_psr_enable(struct dmub_psr *dmub, bool enable, bool wait, uint8
 
 	cmd.psr_enable.header.payload_bytes = 0; // Send header only
 
-	dm_execute_dmub_cmd(dc->dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	/* Below loops 1000 x 500us = 500 ms.
 	 *  Exit PSR may need to wait 1-2 frames to power up. Timeout after at
@@ -248,7 +243,7 @@ static void dmub_psr_set_level(struct dmub_psr *dmub, uint16_t psr_level, uint8_
 	cmd.psr_set_level.psr_set_level_data.psr_level = psr_level;
 	cmd.psr_set_level.psr_set_level_data.cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1;
 	cmd.psr_set_level.psr_set_level_data.panel_inst = panel_inst;
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 /*
@@ -267,7 +262,7 @@ static void dmub_psr_set_sink_vtotal_in_psr_active(struct dmub_psr *dmub,
 	cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_idle = psr_vtotal_idle;
 	cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_su = psr_vtotal_su;
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 /*
@@ -286,7 +281,7 @@ static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt
 	cmd.psr_set_power_opt.psr_set_power_opt_data.power_opt = power_opt;
 	cmd.psr_set_power_opt.psr_set_power_opt_data.panel_inst = panel_inst;
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 /*
@@ -423,7 +418,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
 		copy_settings_data->relock_delay_frame_cnt = 2;
 	copy_settings_data->dsc_slice_height = psr_context->dsc_slice_height;
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -444,7 +439,7 @@ static void dmub_psr_force_static(struct dmub_psr *dmub, uint8_t panel_inst)
 	cmd.psr_force_static.header.sub_type = DMUB_CMD__PSR_FORCE_STATIC;
 	cmd.psr_enable.header.payload_bytes = 0;
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 /*
@@ -452,13 +447,11 @@ static void dmub_psr_force_static(struct dmub_psr *dmub, uint8_t panel_inst)
  */
 static void dmub_psr_get_residency(struct dmub_psr *dmub, uint32_t *residency, uint8_t panel_inst)
 {
-	struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub;
 	uint16_t param = (uint16_t)(panel_inst << 8);
 
 	/* Send gpint command and wait for ack */
-	dmub_srv_send_gpint_command(srv, DMUB_GPINT__PSR_RESIDENCY, param, 30);
-
-	dmub_srv_get_gpint_response(srv, residency);
+	dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__PSR_RESIDENCY, param, residency,
+				  DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
 }
 
 static const struct dmub_psr_funcs psr_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
index 28149e53c2a6..38e4797e9476 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
@@ -258,13 +258,97 @@ static void dmub_replay_residency(struct dmub_replay *dmub, uint8_t panel_inst,
 		*residency = 0;
 }
 
+/**
+ * Set REPLAY power optimization flags and coasting vtotal.
+ */
+static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dmub,
+		unsigned int power_opt, uint8_t panel_inst, uint16_t coasting_vtotal)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *dc = dmub->ctx;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.replay_set_power_opt_and_coasting_vtotal.header.type = DMUB_CMD__REPLAY;
+	cmd.replay_set_power_opt_and_coasting_vtotal.header.sub_type =
+		DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL;
+	cmd.replay_set_power_opt_and_coasting_vtotal.header.payload_bytes =
+		sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal);
+	cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_power_opt_data.power_opt = power_opt;
+	cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_power_opt_data.panel_inst = panel_inst;
+	cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_coasting_vtotal_data.coasting_vtotal = coasting_vtotal;
+
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+/**
+ * send Replay general cmd to DMUB.
+ */
+static void dmub_replay_send_cmd(struct dmub_replay *dmub,
+		enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element)
+{
+	union dmub_rb_cmd cmd;
+	struct dc_context *ctx = NULL;
+
+	if (dmub == NULL || cmd_element == NULL)
+		return;
+
+	ctx = dmub->ctx;
+	if (ctx != NULL) {
+
+		if (msg != Replay_Msg_Not_Support) {
+			memset(&cmd, 0, sizeof(cmd));
+			//Header
+			cmd.replay_set_timing_sync.header.type = DMUB_CMD__REPLAY;
+		} else
+			return;
+	} else
+		return;
+
+	switch (msg) {
+	case Replay_Set_Timing_Sync_Supported:
+		//Header
+		cmd.replay_set_timing_sync.header.sub_type =
+			DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED;
+		cmd.replay_set_timing_sync.header.payload_bytes =
+			sizeof(struct dmub_rb_cmd_replay_set_timing_sync);
+		//Cmd Body
+		cmd.replay_set_timing_sync.replay_set_timing_sync_data.panel_inst =
+						cmd_element->sync_data.panel_inst;
+		cmd.replay_set_timing_sync.replay_set_timing_sync_data.timing_sync_supported =
+						cmd_element->sync_data.timing_sync_supported;
+		break;
+	case Replay_Set_Residency_Frameupdate_Timer:
+		//Header
+		cmd.replay_set_frameupdate_timer.header.sub_type =
+			DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER;
+		cmd.replay_set_frameupdate_timer.header.payload_bytes =
+			sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer);
+		//Cmd Body
+		cmd.replay_set_frameupdate_timer.data.panel_inst =
+						cmd_element->panel_inst;
+		cmd.replay_set_frameupdate_timer.data.enable =
+						cmd_element->timer_data.enable;
+		cmd.replay_set_frameupdate_timer.data.frameupdate_count =
+						cmd_element->timer_data.frameupdate_count;
+		break;
+	case Replay_Msg_Not_Support:
+	default:
+		return;
+		break;
+	}
+
+	dc_wake_and_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
 static const struct dmub_replay_funcs replay_funcs = {
-	.replay_copy_settings		= dmub_replay_copy_settings,
-	.replay_enable			= dmub_replay_enable,
-	.replay_get_state		= dmub_replay_get_state,
-	.replay_set_power_opt		= dmub_replay_set_power_opt,
-	.replay_set_coasting_vtotal	= dmub_replay_set_coasting_vtotal,
-	.replay_residency		= dmub_replay_residency,
+	.replay_copy_settings				= dmub_replay_copy_settings,
+	.replay_enable					= dmub_replay_enable,
+	.replay_get_state				= dmub_replay_get_state,
+	.replay_set_power_opt				= dmub_replay_set_power_opt,
+	.replay_set_coasting_vtotal			= dmub_replay_set_coasting_vtotal,
+	.replay_residency				= dmub_replay_residency,
+	.replay_set_power_opt_and_coasting_vtotal	= dmub_replay_set_power_opt_and_coasting_vtotal,
+	.replay_send_cmd				= dmub_replay_send_cmd,
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
index e8385bbf51fc..3613aff994d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
@@ -45,10 +45,14 @@ struct dmub_replay_funcs {
 		struct replay_context *replay_context, uint8_t panel_inst);
 	void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt,
 		uint8_t panel_inst);
+	void (*replay_send_cmd)(struct dmub_replay *dmub,
+		enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element);
 	void (*replay_set_coasting_vtotal)(struct dmub_replay *dmub, uint16_t coasting_vtotal,
 		uint8_t panel_inst);
 	void (*replay_residency)(struct dmub_replay *dmub,
 		uint8_t panel_inst, uint32_t *residency, const bool is_start, const bool is_alpm);
+	void (*replay_set_power_opt_and_coasting_vtotal)(struct dmub_replay *dmub,
+		unsigned int power_opt, uint8_t panel_inst, uint16_t coasting_vtotal);
 };
 
 struct dmub_replay *dmub_replay_create(struct dc_context *ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/Makefile b/drivers/gpu/drm/amd/display/dc/dce100/Makefile
deleted file mode 100644
index 0d2f6bbf7558..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dce100/Makefile
+++ /dev/null
@@ -1,46 +0,0 @@
-#
-# Copyright 2017 Advanced Micro Devices, Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-#
-# Makefile for the 'controller' sub-component of DAL.
-# It provides the control and status of HW CRTC block.
-
-CFLAGS_$(AMDDALPATH)/dc/dce100/dce100_resource.o = $(call cc-disable-warning, override-init)
-
-DCE100 = dce100_resource.o
-
-AMD_DAL_DCE100 = $(addprefix $(AMDDALPATH)/dc/dce100/,$(DCE100))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCE100)
-
-
-###############################################################################
-# DCE 10x
-###############################################################################
-ifdef 0#CONFIG_DRM_AMD_DC_DCE11_0
-TG_DCE100 = dce100_resource.o
-
-AMD_DAL_TG_DCE100 = $(addprefix \
-	$(AMDDALPATH)/dc/dce100/,$(TG_DCE100))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_TG_DCE100)
-endif
-
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/Makefile b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
index 695a50ed5ad2..f0777d61c2cb 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
@@ -26,8 +26,8 @@
 CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = $(call cc-disable-warning, override-init)
 
 DCE110 = dce110_timing_generator.o \
-dce110_compressor.o dce110_resource.o \
-dce110_opp_regamma_v.o dce110_opp_csc_v.o dce110_timing_generator_v.o \
+dce110_compressor.o dce110_opp_regamma_v.o \
+dce110_opp_csc_v.o dce110_timing_generator_v.o \
 dce110_mem_input_v.o dce110_opp_v.o dce110_transform_v.o
 
 AMD_DAL_DCE110 = $(addprefix $(AMDDALPATH)/dc/dce110/,$(DCE110))
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/Makefile b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
index e846ef58cab3..7e92effec894 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
@@ -25,8 +25,7 @@
 
 CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = $(call cc-disable-warning, override-init)
 
-DCE112 = dce112_compressor.o \
-dce112_resource.o
+DCE112 = dce112_compressor.o
 
 AMD_DAL_DCE112 = $(addprefix $(AMDDALPATH)/dc/dce112/,$(DCE112))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/Makefile b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
index 097cf407a15d..1e3ef68a452a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
@@ -26,7 +26,7 @@
 
 CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = $(call cc-disable-warning, override-init)
 
-DCE120 = dce120_resource.o dce120_timing_generator.o \
+DCE120 = dce120_timing_generator.o
 
 AMD_DAL_DCE120 = $(addprefix $(AMDDALPATH)/dc/dce120/,$(DCE120))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
index 93dd68c31275..7eefffbdc925 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
@@ -25,8 +25,7 @@
 
 CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = $(call cc-disable-warning, override-init)
 
-DCE80 = dce80_timing_generator.o \
-	dce80_resource.o
+DCE80 = dce80_timing_generator.o
 
 AMD_DAL_DCE80 = $(addprefix $(AMDDALPATH)/dc/dce80/,$(DCE80))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index 2d2007c3e2b6..ae6a131be71b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@@ -22,9 +22,9 @@
 #
 # Makefile for DCN.
 
-DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o \
+DCN10 = dcn10_ipp.o \
 		dcn10_hw_sequencer_debug.o \
-		dcn10_dpp.o dcn10_opp.o dcn10_optc.o \
+		dcn10_dpp.o dcn10_opp.o \
 		dcn10_hubp.o dcn10_mpc.o \
 		dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \
 		dcn10_hubbub.o dcn10_stream_encoder.o dcn10_link_encoder.o
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
index 92fdab731f4a..9033b39e0e0c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
@@ -32,7 +32,7 @@
 #include "dce/dce_hwseq.h"
 #include "abm.h"
 #include "dmcu.h"
-#include "dcn10_optc.h"
+#include "dcn10/dcn10_optc.h"
 #include "dcn10/dcn10_dpp.h"
 #include "dcn10/dcn10_mpc.h"
 #include "timing_generator.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index d7dc9696a8c8..3dae3943b056 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -2,13 +2,11 @@
 #
 # Makefile for DCN.
 
-DCN20 = dcn20_resource.o dcn20_init.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
-		dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_optc.o dcn20_mmhubbub.o \
+DCN20 = dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
+		dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_mmhubbub.o \
 		dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \
 		dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o
 
-DCN20 += dcn20_dsc.o
-
 AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DCN20)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
index ab6d09c6fe34..ef5c22f41563 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
@@ -291,7 +291,43 @@
 	type SYMCLKB_FE_SRC_SEL;\
 	type SYMCLKC_FE_SRC_SEL;\
 	type SYMCLKD_FE_SRC_SEL;\
-	type SYMCLKE_FE_SRC_SEL;
+	type SYMCLKE_FE_SRC_SEL;\
+	type DTBCLK_P0_GATE_DISABLE;\
+	type DTBCLK_P1_GATE_DISABLE;\
+	type DTBCLK_P2_GATE_DISABLE;\
+	type DTBCLK_P3_GATE_DISABLE;\
+	type DSCCLK0_ROOT_GATE_DISABLE;\
+	type DSCCLK1_ROOT_GATE_DISABLE;\
+	type DSCCLK2_ROOT_GATE_DISABLE;\
+	type DSCCLK3_ROOT_GATE_DISABLE;\
+	type SYMCLKA_FE_ROOT_GATE_DISABLE;\
+	type SYMCLKB_FE_ROOT_GATE_DISABLE;\
+	type SYMCLKC_FE_ROOT_GATE_DISABLE;\
+	type SYMCLKD_FE_ROOT_GATE_DISABLE;\
+	type SYMCLKE_FE_ROOT_GATE_DISABLE;\
+	type DPPCLK0_ROOT_GATE_DISABLE;\
+	type DPPCLK1_ROOT_GATE_DISABLE;\
+	type DPPCLK2_ROOT_GATE_DISABLE;\
+	type DPPCLK3_ROOT_GATE_DISABLE;\
+	type HDMISTREAMCLK0_ROOT_GATE_DISABLE;\
+	type SYMCLKA_ROOT_GATE_DISABLE;\
+	type SYMCLKB_ROOT_GATE_DISABLE;\
+	type SYMCLKC_ROOT_GATE_DISABLE;\
+	type SYMCLKD_ROOT_GATE_DISABLE;\
+	type SYMCLKE_ROOT_GATE_DISABLE;\
+	type PHYA_REFCLK_ROOT_GATE_DISABLE;\
+	type PHYB_REFCLK_ROOT_GATE_DISABLE;\
+	type PHYC_REFCLK_ROOT_GATE_DISABLE;\
+	type PHYD_REFCLK_ROOT_GATE_DISABLE;\
+	type PHYE_REFCLK_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK0_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK1_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK2_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK3_ROOT_GATE_DISABLE;\
+	type DPSTREAMCLK0_GATE_DISABLE;\
+	type DPSTREAMCLK1_GATE_DISABLE;\
+	type DPSTREAMCLK2_GATE_DISABLE;\
+	type DPSTREAMCLK3_GATE_DISABLE;\
 
 struct dccg_shift {
 	DCCG_REG_FIELD_LIST(uint8_t)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
index 139cf31d2e45..89c3bf0fe0c9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
@@ -1077,8 +1077,16 @@ void hubp2_cursor_set_position(
 	if (src_y_offset < 0)
 		src_y_offset = 0;
 	/* Save necessary cursor info x, y position. w, h is saved in attribute func. */
-	hubp->cur_rect.x = src_x_offset + param->viewport.x;
-	hubp->cur_rect.y = src_y_offset + param->viewport.y;
+	if (param->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 &&
+	    param->rotation != ROTATION_ANGLE_0) {
+		hubp->cur_rect.x = 0;
+		hubp->cur_rect.y = 0;
+		hubp->cur_rect.w = param->stream->timing.h_addressable;
+		hubp->cur_rect.h = param->stream->timing.v_addressable;
+	} else {
+		hubp->cur_rect.x = src_x_offset + param->viewport.x;
+		hubp->cur_rect.y = src_y_offset + param->viewport.y;
+	}
 }
 
 void hubp2_clk_cntl(struct hubp *hubp, bool enable)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
index 3a41a97b0729..2b0b4f32e13b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
@@ -1,9 +1,8 @@
 # SPDX-License-Identifier: MIT
 #
 # Makefile for DCN.
-DCN201 = dcn201_init.o dcn201_resource.o \
-	dcn201_hubbub.o\
-	dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_optc.o dcn201_dpp.o \
+DCN201 = dcn201_hubbub.o\
+	dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_dpp.o \
 	dcn201_dccg.o dcn201_link_encoder.o
 
 AMD_DAL_DCN201 = $(addprefix $(AMDDALPATH)/dc/dcn201/,$(DCN201))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
index ce1be0afae4a..ca92f5c8e7fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
@@ -2,7 +2,7 @@
 #
 # Makefile for DCN21.
 
-DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o \
+DCN21 = dcn21_hubp.o dcn21_hubbub.o \
 	 dcn21_link_encoder.o dcn21_dccg.o
 
 AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
index 68cad55c72ab..e13d69a22c1c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
@@ -691,7 +691,7 @@ static void dmcub_PLAT_54186_wa(struct hubp *hubp,
 	cmd.PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid;
 
 	PERF_TRACE();  // TODO: remove after performance is stable.
-	dm_execute_dmub_cmd(hubp->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(hubp->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 	PERF_TRACE();  // TODO: remove after performance is stable.
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
index af4d2065d2c1..b5b2aa3b3783 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
@@ -23,12 +23,9 @@
 #
 #
 
-DCN30 := \
-	dcn30_init.o \
-	dcn30_hubbub.o \
+DCN30 := dcn30_hubbub.o \
 	dcn30_hubp.o \
 	dcn30_dpp.o \
-	dcn30_optc.o \
 	dcn30_dccg.o \
 	dcn30_mpc.o dcn30_vpg.o \
 	dcn30_afmt.o \
@@ -38,7 +35,6 @@ DCN30 := \
 	dcn30_dwb_cm.o \
 	dcn30_cm_common.o \
 	dcn30_mmhubbub.o \
-	dcn30_resource.o \
 	dcn30_dio_link_encoder.o
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c
index 0d98918bf0fc..1b9d9495f76d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c
@@ -130,6 +130,28 @@ bool dwb3_disable(struct dwbc *dwbc)
 	return true;
 }
 
+void dwb3_set_fc_enable(struct dwbc *dwbc, enum dwb_frame_capture_enable enable)
+{
+	struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc);
+	unsigned int pre_locked;
+
+	REG_GET(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, &pre_locked);
+
+	/* Lock DWB registers */
+	if (pre_locked == 0)
+		REG_UPDATE(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, 1);
+
+	/* Disable FC */
+	REG_UPDATE(FC_MODE_CTRL, FC_FRAME_CAPTURE_EN, enable);
+
+	/* Unlock DWB registers */
+	if (pre_locked == 0)
+		REG_UPDATE(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, 0);
+
+	DC_LOG_DWB("%s dwb3_fc_disabled at inst = %d", __func__, dwbc->inst);
+}
+
+
 bool dwb3_update(struct dwbc *dwbc, struct dc_dwb_params *params)
 {
 	struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc);
@@ -226,6 +248,7 @@ static const struct dwbc_funcs dcn30_dwbc_funcs = {
 	.disable		= dwb3_disable,
 	.update			= dwb3_update,
 	.is_enabled		= dwb3_is_enabled,
+	.set_fc_enable		= dwb3_set_fc_enable,
 	.set_stereo		= dwb3_set_stereo,
 	.set_new_content	= dwb3_set_new_content,
 	.dwb_program_output_csc	= NULL,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h
index a5d1b81e768d..332634b76aac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h
@@ -877,6 +877,8 @@ bool dwb3_update(struct dwbc *dwbc, struct dc_dwb_params *params);
 
 bool dwb3_is_enabled(struct dwbc *dwbc);
 
+void dwb3_set_fc_enable(struct dwbc *dwbc, enum dwb_frame_capture_enable enable);
+
 void dwb3_set_stereo(struct dwbc *dwbc,
 	struct dwb_stereo_params *stereo_params);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
index 701c7d8bc038..03a50c32fcfe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
@@ -243,6 +243,9 @@ static bool dwb3_program_ogam_lut(
 		return false;
 	}
 
+	if (params->hw_points_num == 0)
+		return false;
+
 	REG_SET(DWB_OGAM_CONTROL, 0, DWB_OGAM_MODE, 2);
 
 	current_mode = dwb3_get_ogam_current(dwbc30);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
index 30fbc5e06dca..d241f665e40a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
@@ -10,9 +10,8 @@
 #
 # Makefile for dcn30.
 
-DCN301 = dcn301_init.o dcn301_resource.o dcn301_dccg.o \
-		dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o \
-		dcn301_optc.o
+DCN301 = dcn301_dccg.o \
+		dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o
 
 AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
deleted file mode 100644
index 95b66baf39e9..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-#
-# (c) Copyright 2020 Advanced Micro Devices, Inc. All the rights reserved
-#
-#  Authors: AMD
-#
-# Makefile for dcn302.
-
-DCN3_02 = dcn302_init.o dcn302_resource.o
-
-AMD_DAL_DCN3_02 = $(addprefix $(AMDDALPATH)/dc/dcn302/,$(DCN3_02))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_02)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile
index d7b3ad780e5d..a954e316aca2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile
@@ -6,7 +6,7 @@
 #
 # Makefile for dcn303.
 
-DCN3_03 = dcn303_init.o dcn303_resource.o
+DCN3_03 = dcn303_init.o
 
 AMD_DAL_DCN3_03 = $(addprefix $(AMDDALPATH)/dc/dcn303/,$(DCN3_03))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
index 96e45c9efb46..5d93ac16c03a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
@@ -10,8 +10,8 @@
 #
 # Makefile for dcn31.
 
-DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_init.o dcn31_hubp.o \
-	dcn31_dccg.o dcn31_optc.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \
+DCN31 = dcn31_hubbub.o dcn31_hubp.o \
+	dcn31_dccg.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \
 	dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \
 	dcn31_afmt.o dcn31_vpg.o
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
index 4596f3bac1b4..26be5fee7411 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
@@ -125,7 +125,7 @@ static bool query_dp_alt_from_dmub(struct link_encoder *enc,
 	cmd->query_dp_alt.header.payload_bytes = sizeof(cmd->query_dp_alt.data);
 	cmd->query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter);
 
-	if (!dm_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+	if (!dc_wake_and_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
 		return false;
 
 	return true;
@@ -436,7 +436,7 @@ static bool link_dpia_control(struct dc_context *dc_ctx,
 
 	cmd.dig1_dpia_control.dpia_control = *dpia_control;
 
-	dm_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
index 217acd4e292a..03248422d6ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
@@ -50,9 +50,9 @@ static bool dcn31_query_backlight_info(struct panel_cntl *panel_cntl, union dmub
 	cmd->panel_cntl.header.type = DMUB_CMD__PANEL_CNTL;
 	cmd->panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_QUERY_BACKLIGHT_INFO;
 	cmd->panel_cntl.header.payload_bytes = sizeof(cmd->panel_cntl.data);
-	cmd->panel_cntl.data.inst = dcn31_panel_cntl->base.inst;
+	cmd->panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst;
 
-	return dm_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
+	return dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
 }
 
 static uint32_t dcn31_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_cntl)
@@ -78,14 +78,14 @@ static uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
 	cmd.panel_cntl.header.type = DMUB_CMD__PANEL_CNTL;
 	cmd.panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_HW_INIT;
 	cmd.panel_cntl.header.payload_bytes = sizeof(cmd.panel_cntl.data);
-	cmd.panel_cntl.data.inst = dcn31_panel_cntl->base.inst;
+	cmd.panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst;
 	cmd.panel_cntl.data.bl_pwm_cntl = panel_cntl->stored_backlight_registers.BL_PWM_CNTL;
 	cmd.panel_cntl.data.bl_pwm_period_cntl = panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL;
 	cmd.panel_cntl.data.bl_pwm_ref_div1 =
 		panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV;
 	cmd.panel_cntl.data.bl_pwm_ref_div2 =
 		panel_cntl->stored_backlight_registers.PANEL_PWRSEQ_REF_DIV2;
-	if (!dm_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+	if (!dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
 		return 0;
 
 	panel_cntl->stored_backlight_registers.BL_PWM_CNTL = cmd.panel_cntl.data.bl_pwm_cntl;
@@ -157,4 +157,5 @@ void dcn31_panel_cntl_construct(
 	dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs;
 	dcn31_panel_cntl->base.ctx = init_data->ctx;
 	dcn31_panel_cntl->base.inst = init_data->inst;
+	dcn31_panel_cntl->base.pwrseq_inst = init_data->pwrseq_inst;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
index 72456debb99f..b134ab05aa71 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
@@ -10,8 +10,7 @@
 #
 # Makefile for dcn314.
 
-DCN314 = dcn314_resource.o dcn314_init.o \
-		dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o
+DCN314 = dcn314_dio_stream_encoder.o dcn314_dccg.o
 
 AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
deleted file mode 100644
index 59381d24800b..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# Copyright © 2021 Advanced Micro Devices, Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-# Authors: AMD
-#
-# Makefile for dcn315.
-
-DCN315 = dcn315_resource.o
-
-AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN315)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
deleted file mode 100644
index 819d44a9439b..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# Copyright 2021 Advanced Micro Devices, Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-# Authors: AMD
-#
-# Makefile for dcn316.
-
-DCN316 = dcn316_resource.o
-
-AMD_DAL_DCN316 = $(addprefix $(AMDDALPATH)/dc/dcn316/,$(DCN316))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN316)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
index 8bb251307247..5314770fff1c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
@@ -10,10 +10,10 @@
 #
 # Makefile for dcn32.
 
-DCN32 = dcn32_resource.o dcn32_hubbub.o dcn32_init.o dcn32_dccg.o \
-		dcn32_dccg.o dcn32_optc.o dcn32_mmhubbub.o dcn32_hubp.o dcn32_dpp.o \
-		dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_hpo_dp_link_encoder.o \
-		dcn32_resource_helpers.o dcn32_mpc.o
+DCN32 = dcn32_hubbub.o dcn32_dccg.o \
+		dcn32_mmhubbub.o dcn32_dpp.o dcn32_hubp.o dcn32_mpc.o \
+		dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_resource_helpers.o \
+		dcn32_hpo_dp_link_encoder.o
 
 AMD_DAL_DCN32 = $(addprefix $(AMDDALPATH)/dc/dcn32/,$(DCN32))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
index 994b21ed272f..e789e654c387 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
@@ -71,12 +71,13 @@ void mpc32_power_on_blnd_lut(
 {
 	struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
 
+	REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0, MPCC_MCM_1DLUT_MEM_PWR_DIS, power_on);
+
 	if (mpc->ctx->dc->debug.enable_mem_low_power.bits.cm) {
 		if (power_on) {
 			REG_UPDATE(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_FORCE, 0);
 			REG_WAIT(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_STATE, 0, 1, 5);
 		} else if (!mpc->ctx->dc->debug.disable_mem_low_power) {
-			ASSERT(false);
 			/* TODO: change to mpc
 			 *  dpp_base->ctx->dc->optimized_required = true;
 			 *  dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index bc5f0db23d0c..e4a328b45c8a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -24,10 +24,11 @@
  */
 
 // header file of functions being implemented
-#include "dcn32_resource.h"
+#include "dcn32/dcn32_resource.h"
 #include "dcn20/dcn20_resource.h"
 #include "dml/dcn32/display_mode_vba_util_32.h"
 #include "dml/dcn32/dcn32_fpu.h"
+#include "dc_state_priv.h"
 
 static bool is_dual_plane(enum surface_pixel_format format)
 {
@@ -182,20 +183,6 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
 	return true;
 }
 
-bool dcn32_subvp_in_use(struct dc *dc,
-		struct dc_state *context)
-{
-	uint32_t i;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE)
-			return true;
-	}
-	return false;
-}
-
 bool dcn32_mpo_in_use(struct dc_state *context)
 {
 	uint32_t i;
@@ -264,18 +251,17 @@ static void override_det_for_subvp(struct dc *dc, struct dc_state *context, uint
 
 	// Do not override if a stream has multiple planes
 	for (i = 0; i < context->stream_count; i++) {
-		if (context->stream_status[i].plane_count > 1) {
+		if (context->stream_status[i].plane_count > 1)
 			return;
-		}
-		if (context->streams[i]->mall_stream_config.type != SUBVP_PHANTOM) {
+
+		if (dc_state_get_stream_subvp_type(context, context->streams[i]) != SUBVP_PHANTOM)
 			stream_count++;
-		}
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe_ctx->stream && pipe_ctx->plane_state && pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+		if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
 			if (dcn32_allow_subvp_high_refresh_rate(dc, context, pipe_ctx)) {
 
 				if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) {
@@ -290,7 +276,7 @@ static void override_det_for_subvp(struct dc *dc, struct dc_state *context, uint
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-			if (pipe_ctx->stream && pipe_ctx->plane_state && pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+			if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
 				if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) {
 					if (pipe_segments[i] > 4)
 						pipe_segments[i] = 4;
@@ -337,14 +323,14 @@ void dcn32_determine_det_override(struct dc *dc,
 
 	for (i = 0; i < context->stream_count; i++) {
 		/* Don't count SubVP streams for DET allocation */
-		if (context->streams[i]->mall_stream_config.type != SUBVP_PHANTOM)
+		if (dc_state_get_stream_subvp_type(context, context->streams[i]) != SUBVP_PHANTOM)
 			stream_count++;
 	}
 
 	if (stream_count > 0) {
 		stream_segments = 18 / stream_count;
 		for (i = 0; i < context->stream_count; i++) {
-			if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM)
+			if (dc_state_get_stream_subvp_type(context, context->streams[i]) == SUBVP_PHANTOM)
 				continue;
 
 			if (context->stream_status[i].plane_count > 0)
@@ -430,71 +416,6 @@ void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context,
 		dcn32_determine_det_override(dc, context, pipes);
 }
 
-/**
- * dcn32_save_mall_state(): Save MALL (SubVP) state for fast validation cases
- *
- * This function saves the MALL (SubVP) case for fast validation cases. For fast validation,
- * there are situations where a shallow copy of the dc->current_state is created for the
- * validation. In this case we want to save and restore the mall config because we always
- * teardown subvp at the beginning of validation (and don't attempt to add it back if it's
- * fast validation). If we don't restore the subvp config in cases of fast validation +
- * shallow copy of the dc->current_state, the dc->current_state will have a partially
- * removed subvp state when we did not intend to remove it.
- *
- * NOTE: This function ONLY works if the streams are not moved to a different pipe in the
- *       validation. We don't expect this to happen in fast_validation=1 cases.
- *
- * @dc: Current DC state
- * @context: New DC state to be programmed
- * @temp_config: struct used to cache the existing MALL state
- *
- * Return: void
- */
-void dcn32_save_mall_state(struct dc *dc,
-		struct dc_state *context,
-		struct mall_temp_config *temp_config)
-{
-	uint32_t i;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (pipe->stream)
-			temp_config->mall_stream_config[i] = pipe->stream->mall_stream_config;
-
-		if (pipe->plane_state)
-			temp_config->is_phantom_plane[i] = pipe->plane_state->is_phantom;
-	}
-}
-
-/**
- * dcn32_restore_mall_state(): Restore MALL (SubVP) state for fast validation cases
- *
- * Restore the MALL state based on the previously saved state from dcn32_save_mall_state
- *
- * @dc: Current DC state
- * @context: New DC state to be programmed, restore MALL state into here
- * @temp_config: struct that has the cached MALL state
- *
- * Return: void
- */
-void dcn32_restore_mall_state(struct dc *dc,
-		struct dc_state *context,
-		struct mall_temp_config *temp_config)
-{
-	uint32_t i;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (pipe->stream)
-			pipe->stream->mall_stream_config = temp_config->mall_stream_config[i];
-
-		if (pipe->plane_state)
-			pipe->plane_state->is_phantom = temp_config->is_phantom_plane[i];
-	}
-}
-
 #define MAX_STRETCHED_V_BLANK 1000 // in micro-seconds (must ensure to match value in FW)
 /*
  * Scaling factor for v_blank stretch calculations considering timing in
@@ -589,13 +510,14 @@ static int get_refresh_rate(struct dc_stream_state *fpo_candidate_stream)
  *
  * Return: Pointer to FPO stream candidate if config can support FPO, otherwise NULL
  */
-struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context)
+struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context)
 {
 	int refresh_rate = 0;
 	const int minimum_refreshrate_supported = 120;
 	struct dc_stream_state *fpo_candidate_stream = NULL;
 	bool is_fpo_vactive = false;
 	uint32_t fpo_vactive_margin_us = 0;
+	struct dc_stream_status *fpo_stream_status = NULL;
 
 	if (context == NULL)
 		return NULL;
@@ -618,16 +540,28 @@ struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stre
 		DC_FP_START();
 		dcn32_assign_fpo_vactive_candidate(dc, context, &fpo_candidate_stream);
 		DC_FP_END();
-
+		if (fpo_candidate_stream)
+			fpo_stream_status = dc_state_get_stream_status(context, fpo_candidate_stream);
 		DC_FP_START();
 		is_fpo_vactive = dcn32_find_vactive_pipe(dc, context, dc->debug.fpo_vactive_min_active_margin_us);
 		DC_FP_END();
 		if (!is_fpo_vactive || dc->debug.disable_fpo_vactive)
 			return NULL;
-	} else
+	} else {
 		fpo_candidate_stream = context->streams[0];
+		if (fpo_candidate_stream)
+			fpo_stream_status = dc_state_get_stream_status(context, fpo_candidate_stream);
+	}
 
-	if (!fpo_candidate_stream)
+	/* In DCN32/321, FPO uses per-pipe P-State force.
+	 * If there's no planes, HUBP is power gated and
+	 * therefore programming UCLK_PSTATE_FORCE does
+	 * nothing (P-State will always be asserted naturally
+	 * on a pipe that has HUBP power gated. Therefore we
+	 * only want to enable FPO if the FPO pipe has both
+	 * a stream and a plane.
+	 */
+	if (!fpo_candidate_stream || !fpo_stream_status || fpo_stream_status->plane_count == 0)
 		return NULL;
 
 	if (fpo_candidate_stream->sink->edid_caps.panel_patch.disable_fams)
@@ -666,6 +600,30 @@ bool dcn32_check_native_scaling_for_res(struct pipe_ctx *pipe, unsigned int widt
 }
 
 /**
+ * disallow_subvp_in_active_plus_blank() - Function to determine disallowed subvp + drr/vblank configs
+ *
+ * @pipe: subvp pipe to be used for the subvp + drr/vblank config
+ *
+ * Since subvp is being enabled on more configs (such as 1080p60), we want
+ * to explicitly block any configs that we don't want to enable. We do not
+ * want to enable any 1080p60 (SubVP) + drr / vblank configs since these
+ * are already convered by FPO.
+ *
+ * Return: True if disallowed, false otherwise
+ */
+static bool disallow_subvp_in_active_plus_blank(struct pipe_ctx *pipe)
+{
+	bool disallow = false;
+
+	if (resource_is_pipe_type(pipe, OPP_HEAD) &&
+			resource_is_pipe_type(pipe, DPP_PIPE)) {
+		if (pipe->stream->timing.v_addressable == 1080 && pipe->stream->timing.h_addressable == 1920)
+			disallow = true;
+	}
+	return disallow;
+}
+
+/**
  * dcn32_subvp_drr_admissable() - Determine if SubVP + DRR config is admissible
  *
  * @dc: Current DC state
@@ -688,21 +646,24 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context)
 	bool drr_pipe_found = false;
 	bool drr_psr_capable = false;
 	uint64_t refresh_rate = 0;
+	bool subvp_disallow = false;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+		enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 		if (resource_is_pipe_type(pipe, OPP_HEAD) &&
 				resource_is_pipe_type(pipe, DPP_PIPE)) {
-			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+			if (pipe_mall_type == SUBVP_MAIN) {
 				subvp_count++;
 
+				subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe);
 				refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
 					pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
 			}
-			if (pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+			if (pipe_mall_type == SUBVP_NONE) {
 				non_subvp_pipes++;
 				drr_psr_capable = (drr_psr_capable || dcn32_is_psr_capable(pipe));
 				if (pipe->stream->ignore_msa_timing_param &&
@@ -713,7 +674,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context)
 		}
 	}
 
-	if (subvp_count == 1 && non_subvp_pipes == 1 && drr_pipe_found && !drr_psr_capable &&
+	if (subvp_count == 1 && !subvp_disallow && non_subvp_pipes == 1 && drr_pipe_found && !drr_psr_capable &&
 		((uint32_t)refresh_rate < 120))
 		result = true;
 
@@ -746,21 +707,24 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
 	struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
 	bool vblank_psr_capable = false;
 	uint64_t refresh_rate = 0;
+	bool subvp_disallow = false;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+		enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 		if (resource_is_pipe_type(pipe, OPP_HEAD) &&
 				resource_is_pipe_type(pipe, DPP_PIPE)) {
-			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+			if (pipe_mall_type == SUBVP_MAIN) {
 				subvp_count++;
 
+				subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe);
 				refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
 					pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
 				refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
 			}
-			if (pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+			if (pipe_mall_type == SUBVP_NONE) {
 				non_subvp_pipes++;
 				vblank_psr_capable = (vblank_psr_capable || dcn32_is_psr_capable(pipe));
 				if (pipe->stream->ignore_msa_timing_param &&
@@ -772,9 +736,35 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
 	}
 
 	if (subvp_count == 1 && non_subvp_pipes == 1 && !drr_pipe_found && !vblank_psr_capable &&
-		((uint32_t)refresh_rate < 120) &&
+		((uint32_t)refresh_rate < 120) && !subvp_disallow &&
 		vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp)
 		result = true;
 
 	return result;
 }
+
+void dcn32_update_dml_pipes_odm_policy_based_on_context(struct dc *dc, struct dc_state *context,
+		display_e2e_pipe_params_st *pipes)
+{
+	int i, pipe_cnt;
+	struct resource_context *res_ctx = &context->res_ctx;
+	struct pipe_ctx *pipe = NULL;
+
+	for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+		int odm_slice_count = 0;
+
+		if (!res_ctx->pipe_ctx[i].stream)
+			continue;
+		pipe = &res_ctx->pipe_ctx[i];
+		odm_slice_count = resource_get_odm_slice_count(pipe);
+
+		if (odm_slice_count == 1)
+			pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+		else if (odm_slice_count == 2)
+			pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+		else if (odm_slice_count == 4)
+			pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_4to1;
+
+		pipe_cnt++;
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
index 0a199c83bb5b..c195c47f58b4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
@@ -10,7 +10,7 @@
 #
 # Makefile for dcn321.
 
-DCN321 = dcn321_resource.o dcn321_dio_link_encoder.o
+DCN321 = dcn321_dio_link_encoder.o
 
 AMD_DAL_DCN321 = $(addprefix $(AMDDALPATH)/dc/dcn321/,$(DCN321))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
index 20d0eef1a13b..0e317e0c36a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile
@@ -10,9 +10,9 @@
 #
 # Makefile for DCN35.
 
-DCN35 = dcn35_resource.o dcn35_init.o dcn35_dio_stream_encoder.o \
-	dcn35_dio_link_encoder.o dcn35_dccg.o dcn35_optc.o \
-	dcn35_dsc.o dcn35_hubp.o dcn35_hubbub.o \
+DCN35 = dcn35_dio_stream_encoder.o \
+	dcn35_dio_link_encoder.o dcn35_dccg.o \
+	dcn35_hubp.o dcn35_hubbub.o \
 	dcn35_mmhubbub.o dcn35_opp.o dcn35_dpp.o dcn35_pg_cntl.o dcn35_dwb.o
 
 AMD_DAL_DCN35 = $(addprefix $(AMDDALPATH)/dc/dcn35/,$(DCN35))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
index 479f3683c0b7..f1ba7bb792ea 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c
@@ -256,6 +256,21 @@ static void dccg35_set_dtbclk_dto(
 	if (params->ref_dtbclk_khz && req_dtbclk_khz) {
 		uint32_t modulo, phase;
 
+		switch (params->otg_inst) {
+		case 0:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 1);
+			break;
+		case 1:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 1);
+			break;
+		case 2:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 1);
+			break;
+		case 3:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 1);
+			break;
+		}
+
 		// phase / modulo = dtbclk / dtbclk ref
 		modulo = params->ref_dtbclk_khz * 1000;
 		phase = req_dtbclk_khz * 1000;
@@ -280,6 +295,21 @@ static void dccg35_set_dtbclk_dto(
 		REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
 				PIPE_DTO_SRC_SEL[params->otg_inst], 2);
 	} else {
+		switch (params->otg_inst) {
+		case 0:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 0);
+			break;
+		case 1:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 0);
+			break;
+		case 2:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 0);
+			break;
+		case 3:
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 0);
+			break;
+		}
+
 		REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
 				DTBCLK_DTO_ENABLE[params->otg_inst], 0,
 				PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1);
@@ -476,6 +506,64 @@ static void dccg35_dpp_root_clock_control(
 	dccg->dpp_clock_gated[dpp_inst] = !clock_on;
 }
 
+static void dccg35_disable_symclk32_se(
+		struct dccg *dccg,
+		int hpo_se_inst)
+{
+	struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+	/* set refclk as the source for symclk32_se */
+	switch (hpo_se_inst) {
+	case 0:
+		REG_UPDATE_2(SYMCLK32_SE_CNTL,
+				SYMCLK32_SE0_SRC_SEL, 0,
+				SYMCLK32_SE0_EN, 0);
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+					SYMCLK32_SE0_GATE_DISABLE, 0);
+//			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+//					SYMCLK32_ROOT_SE0_GATE_DISABLE, 0);
+		}
+		break;
+	case 1:
+		REG_UPDATE_2(SYMCLK32_SE_CNTL,
+				SYMCLK32_SE1_SRC_SEL, 0,
+				SYMCLK32_SE1_EN, 0);
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+					SYMCLK32_SE1_GATE_DISABLE, 0);
+//			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+//					SYMCLK32_ROOT_SE1_GATE_DISABLE, 0);
+		}
+		break;
+	case 2:
+		REG_UPDATE_2(SYMCLK32_SE_CNTL,
+				SYMCLK32_SE2_SRC_SEL, 0,
+				SYMCLK32_SE2_EN, 0);
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+					SYMCLK32_SE2_GATE_DISABLE, 0);
+//			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+//					SYMCLK32_ROOT_SE2_GATE_DISABLE, 0);
+		}
+		break;
+	case 3:
+		REG_UPDATE_2(SYMCLK32_SE_CNTL,
+				SYMCLK32_SE3_SRC_SEL, 0,
+				SYMCLK32_SE3_EN, 0);
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+					SYMCLK32_SE3_GATE_DISABLE, 0);
+//			REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+//					SYMCLK32_ROOT_SE3_GATE_DISABLE, 0);
+		}
+		break;
+	default:
+		BREAK_TO_DEBUGGER();
+		return;
+	}
+}
+
 void dccg35_init(struct dccg *dccg)
 {
 	int otg_inst;
@@ -484,7 +572,7 @@ void dccg35_init(struct dccg *dccg)
 	 * will cause DCN to hang.
 	 */
 	for (otg_inst = 0; otg_inst < 4; otg_inst++)
-		dccg31_disable_symclk32_se(dccg, otg_inst);
+		dccg35_disable_symclk32_se(dccg, otg_inst);
 
 	if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
 		for (otg_inst = 0; otg_inst < 2; otg_inst++)
@@ -758,7 +846,7 @@ static const struct dccg_funcs dccg35_funcs = {
 	.dccg_init = dccg35_init,
 	.set_dpstreamclk = dccg35_set_dpstreamclk,
 	.enable_symclk32_se = dccg31_enable_symclk32_se,
-	.disable_symclk32_se = dccg31_disable_symclk32_se,
+	.disable_symclk32_se = dccg35_disable_symclk32_se,
 	.enable_symclk32_le = dccg31_enable_symclk32_le,
 	.disable_symclk32_le = dccg31_disable_symclk32_le,
 	.set_symclk32_le_root_clock_gating = dccg31_set_symclk32_le_root_clock_gating,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h
index 423feb4c2f3f..1586a45ca3bd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h
@@ -34,6 +34,8 @@
 #define DCCG_REG_LIST_DCN35() \
 	DCCG_REG_LIST_DCN314(),\
 	SR(DPPCLK_CTRL),\
+	SR(DCCG_GATE_DISABLE_CNTL4),\
+	SR(DCCG_GATE_DISABLE_CNTL5),\
 	SR(DCCG_GATE_DISABLE_CNTL6),\
 	SR(DCCG_GLOBAL_FGCG_REP_CNTL),\
 	SR(SYMCLKA_CLOCK_ENABLE),\
@@ -174,7 +176,61 @@
 	DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, mask_sh),\
 	DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, mask_sh),\
 	DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, mask_sh),\
-	DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, mask_sh)
+	DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_FE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, HDMICHARCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL6, HDMISTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYA_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYB_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYC_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYD_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYE_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, mask_sh),\
+	DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_PHASE, mask_sh),\
+	DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_MODULO, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL, DISPCLK_DCCG_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL3, HDMISTREAMCLK0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\
+	DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\
 
 struct dccg *dccg35_create(
 		struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
index f91e08895275..da94e5309fba 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
@@ -256,6 +256,10 @@ void dcn35_link_encoder_construct(
 		enc10->base.features.flags.bits.IS_UHBR10_CAPABLE = bp_cap_info.DP_UHBR10_EN;
 		enc10->base.features.flags.bits.IS_UHBR13_5_CAPABLE = bp_cap_info.DP_UHBR13_5_EN;
 		enc10->base.features.flags.bits.IS_UHBR20_CAPABLE = bp_cap_info.DP_UHBR20_EN;
+		if (bp_cap_info.DP_IS_USB_C) {
+			/*BIOS not switch to use CONNECTOR_ID_USBC = 24 yet*/
+			enc10->base.features.flags.bits.DP_IS_USB_C = 1;
+		}
 
 	} else {
 		DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
@@ -264,4 +268,5 @@ void dcn35_link_encoder_construct(
 	}
 	if (enc10->base.ctx->dc->debug.hdmi20_disable)
 		enc10->base.features.flags.bits.HDMI_6GB_EN = 0;
+
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
index 46f71ff08fd1..53bd0ae4bab5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c
@@ -261,6 +261,7 @@ void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 	uint32_t power_gate = power_on ? 0 : 1;
 	uint32_t pwr_status = power_on ? 0 : 2;
 	uint32_t org_ip_request_cntl;
+	uint32_t power_forceon;
 	bool block_enabled;
 
 	if (pg_cntl->ctx->dc->debug.ignore_pg ||
@@ -277,6 +278,10 @@ void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 			return;
 	}
 
+	REG_GET(DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon);
+	if (power_forceon)
+		return;
+
 	REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
 	if (org_ip_request_cntl == 0)
 		REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
@@ -304,6 +309,7 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 	uint32_t power_gate = power_on ? 0 : 1;
 	uint32_t pwr_status = power_on ? 0 : 2;
 	uint32_t org_ip_request_cntl;
+	uint32_t power_forceon;
 	bool block_enabled;
 
 	if (pg_cntl->ctx->dc->debug.ignore_pg ||
@@ -319,6 +325,10 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 			return;
 	}
 
+	REG_GET(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon);
+	if (power_forceon)
+		return;
+
 	REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
 	if (org_ip_request_cntl == 0)
 		REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
@@ -332,13 +342,6 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on)
 	pg_cntl->pg_res_enable[PG_DCIO] = power_on;
 }
 
-void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on)
-{
-	struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
-
-	REG_UPDATE(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, power_on ? 1 : 0);
-}
-
 static bool pg_cntl35_plane_otg_status(struct pg_cntl *pg_cntl)
 {
 	struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
@@ -508,8 +511,7 @@ static const struct pg_cntl_funcs pg_cntl35_funcs = {
 	.mpcc_pg_control = pg_cntl35_mpcc_pg_control,
 	.opp_pg_control = pg_cntl35_opp_pg_control,
 	.optc_pg_control = pg_cntl35_optc_pg_control,
-	.dwb_pg_control = pg_cntl35_dwb_pg_control,
-	.set_force_poweron_domain22 = pg_cntl35_set_force_poweron_domain22
+	.dwb_pg_control = pg_cntl35_dwb_pg_control
 };
 
 struct pg_cntl *pg_cntl35_create(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h
index 069dae08e222..3de240884d22 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h
@@ -183,7 +183,6 @@ void pg_cntl35_optc_pg_control(struct pg_cntl *pg_cntl,
 	unsigned int optc_inst, bool power_on);
 void pg_cntl35_dwb_pg_control(struct pg_cntl *pg_cntl, bool power_on);
 void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl);
-void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on);
 
 struct pg_cntl *pg_cntl35_create(
 	struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
index 7ce9a5b6c33b..6d7a15dcf8a7 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
@@ -103,10 +103,16 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger(
 /*
  * Sends ALLOCATE_PAYLOAD message.
  */
-bool dm_helpers_dp_mst_send_payload_allocation(
+void dm_helpers_dp_mst_send_payload_allocation(
 		struct dc_context *ctx,
-		const struct dc_stream_state *stream,
-		bool enable);
+		const struct dc_stream_state *stream);
+
+/*
+ * Update mst manager relevant variables
+ */
+void dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+		struct dc_context *ctx,
+		const struct dc_stream_state *stream);
 
 bool dm_helpers_dp_mst_start_top_mgr(
 		struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index 4440d08743aa..bd7ba0a25198 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
@@ -247,6 +247,7 @@ struct pp_smu_funcs_nv {
 #define PP_SMU_NUM_MEMCLK_DPM_LEVELS  4
 #define PP_SMU_NUM_DCLK_DPM_LEVELS    8
 #define PP_SMU_NUM_VCLK_DPM_LEVELS    8
+#define PP_SMU_NUM_VPECLK_DPM_LEVELS  8
 
 struct dpm_clock {
   uint32_t  Freq;    // In MHz
@@ -262,6 +263,7 @@ struct dpm_clocks {
 	struct dpm_clock MemClocks[PP_SMU_NUM_MEMCLK_DPM_LEVELS];
 	struct dpm_clock VClocks[PP_SMU_NUM_VCLK_DPM_LEVELS];
 	struct dpm_clock DClocks[PP_SMU_NUM_DCLK_DPM_LEVELS];
+	struct dpm_clock VPEClocks[PP_SMU_NUM_VPECLK_DPM_LEVELS];
 };
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index ea7d60f9a9b4..6042a5a6a44f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -61,8 +61,12 @@ endif
 endif
 
 ifneq ($(CONFIG_FRAME_WARN),0)
+ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
+frame_warn_flag := -Wframe-larger-than=3072
+else
 frame_warn_flag := -Wframe-larger-than=2048
 endif
+endif
 
 CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
index 50b0434354f8..0c4a8fe8e5ca 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
@@ -30,7 +30,7 @@
 #include "dcn_calc_auto.h"
 #include "dal_asic_id.h"
 #include "resource.h"
-#include "dcn10/dcn10_resource.h"
+#include "resource/dcn10/dcn10_resource.h"
 #include "dcn10/dcn10_hubbub.h"
 #include "dml/dml1_display_rq_dlg_calc.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
index 2cbdd75429ff..6e669a2c5b2d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
@@ -36,7 +36,7 @@
  * Define the maximum amount of states supported by the ASIC. Every ASIC has a
  * specific number of states; this macro defines the maximum number of states.
  */
-#define DC__VOLTAGE_STATES 20
+#define DC__VOLTAGE_STATES 40
 #define DC__NUM_DPP__4 1
 #define DC__NUM_DPP__0_PRESENT 1
 #define DC__NUM_DPP__1_PRESENT 1
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index 7fc8b18096ba..38ab9ad60ef8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -33,6 +33,7 @@
 
 #include "link.h"
 #include "dcn20_fpu.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER \
 	dc->ctx->logger
@@ -440,7 +441,115 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = {
 	.use_urgent_burst_bw = 0
 };
 
-struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { 0 };
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = {
+	.clock_limits = {
+		{
+			.state = 0,
+			.dcfclk_mhz = 560.0,
+			.fabricclk_mhz = 560.0,
+			.dispclk_mhz = 513.0,
+			.dppclk_mhz = 513.0,
+			.phyclk_mhz = 540.0,
+			.socclk_mhz = 560.0,
+			.dscclk_mhz = 171.0,
+			.dram_speed_mts = 1069.0,
+		},
+		{
+			.state = 1,
+			.dcfclk_mhz = 694.0,
+			.fabricclk_mhz = 694.0,
+			.dispclk_mhz = 642.0,
+			.dppclk_mhz = 642.0,
+			.phyclk_mhz = 600.0,
+			.socclk_mhz = 694.0,
+			.dscclk_mhz = 214.0,
+			.dram_speed_mts = 1324.0,
+		},
+		{
+			.state = 2,
+			.dcfclk_mhz = 875.0,
+			.fabricclk_mhz = 875.0,
+			.dispclk_mhz = 734.0,
+			.dppclk_mhz = 734.0,
+			.phyclk_mhz = 810.0,
+			.socclk_mhz = 875.0,
+			.dscclk_mhz = 245.0,
+			.dram_speed_mts = 1670.0,
+		},
+		{
+			.state = 3,
+			.dcfclk_mhz = 1000.0,
+			.fabricclk_mhz = 1000.0,
+			.dispclk_mhz = 1100.0,
+			.dppclk_mhz = 1100.0,
+			.phyclk_mhz = 810.0,
+			.socclk_mhz = 1000.0,
+			.dscclk_mhz = 367.0,
+			.dram_speed_mts = 2000.0,
+		},
+		{
+			.state = 4,
+			.dcfclk_mhz = 1200.0,
+			.fabricclk_mhz = 1200.0,
+			.dispclk_mhz = 1284.0,
+			.dppclk_mhz = 1284.0,
+			.phyclk_mhz = 810.0,
+			.socclk_mhz = 1200.0,
+			.dscclk_mhz = 428.0,
+			.dram_speed_mts = 2000.0,
+		},
+		{
+			.state = 5,
+			.dcfclk_mhz = 1200.0,
+			.fabricclk_mhz = 1200.0,
+			.dispclk_mhz = 1284.0,
+			.dppclk_mhz = 1284.0,
+			.phyclk_mhz = 810.0,
+			.socclk_mhz = 1200.0,
+			.dscclk_mhz = 428.0,
+			.dram_speed_mts = 2000.0,
+		},
+	},
+
+	.num_states = 5,
+	.sr_exit_time_us = 1.9,
+	.sr_enter_plus_exit_time_us = 4.4,
+	.urgent_latency_us = 3.0,
+	.urgent_latency_pixel_data_only_us = 4.0,
+	.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+	.urgent_latency_vm_data_only_us = 4.0,
+	.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+	.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+	.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+	.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
+	.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
+	.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+	.max_avg_sdp_bw_use_normal_percent = 40.0,
+	.max_avg_dram_bw_use_normal_percent = 40.0,
+	.writeback_latency_us = 12.0,
+	.ideal_dram_bw_after_urgent_percent = 40.0,
+	.max_request_size_bytes = 256,
+	.dram_channel_width_bytes = 16,
+	.fabric_datapath_to_dcn_data_return_bytes = 64,
+	.dcn_downspread_percent = 0.5,
+	.downspread_percent = 0.5,
+	.dram_page_open_time_ns = 50.0,
+	.dram_rw_turnaround_time_ns = 17.5,
+	.dram_return_buffer_per_channel_bytes = 8192,
+	.round_trip_ping_latency_dcfclk_cycles = 131,
+	.urgent_out_of_order_return_per_channel_bytes = 4096,
+	.channel_interleave_bytes = 256,
+	.num_banks = 8,
+	.num_chans = 16,
+	.vmm_page_size_bytes = 4096,
+	.dram_clock_change_latency_us = 45.0,
+	.writeback_dram_clock_change_latency_us = 23.0,
+	.return_bus_width_bytes = 64,
+	.dispclk_dppclk_vco_speed_mhz = 3850,
+	.xfc_bus_transport_time_us = 20,
+	.xfc_xbuf_latency_tolerance_us = 50,
+	.use_urgent_burst_bw = 0,
+};
 
 struct _vcs_dpi_ip_params_st dcn2_1_ip = {
 	.odm_capable = 1,
@@ -950,10 +1059,8 @@ static enum dcn_zstate_support_state  decide_zstate_support(struct dc *dc, struc
 {
 	int plane_count;
 	int i;
-	unsigned int min_dst_y_next_start_us;
 
 	plane_count = 0;
-	min_dst_y_next_start_us = 0;
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (context->res_ctx.pipe_ctx[i].plane_state)
 			plane_count++;
@@ -975,26 +1082,15 @@ static enum dcn_zstate_support_state  decide_zstate_support(struct dc *dc, struc
 	else if (context->stream_count == 1 &&  context->streams[0]->signal == SIGNAL_TYPE_EDP) {
 		struct dc_link *link = context->streams[0]->sink->link;
 		struct dc_stream_status *stream_status = &context->stream_status[0];
-		struct dc_stream_state *current_stream = context->streams[0];
 		int minmum_z8_residency = dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
 		bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
 		bool is_pwrseq0 = link->link_index == 0;
-		bool isFreesyncVideo;
-
-		isFreesyncVideo = current_stream->adjust.v_total_min == current_stream->adjust.v_total_max;
-		isFreesyncVideo = isFreesyncVideo && current_stream->timing.v_total < current_stream->adjust.v_total_min;
-		for (i = 0; i < dc->res_pool->pipe_count; i++) {
-			if (context->res_ctx.pipe_ctx[i].stream == current_stream && isFreesyncVideo) {
-				min_dst_y_next_start_us = context->res_ctx.pipe_ctx[i].dlg_regs.min_dst_y_next_start_us;
-				break;
-			}
-		}
 
 		/* Don't support multi-plane configurations */
 		if (stream_status->plane_count > 1)
 			return DCN_ZSTATE_SUPPORT_DISALLOW;
 
-		if (is_pwrseq0 && (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || min_dst_y_next_start_us > 5000))
+		if (is_pwrseq0 && context->bw_ctx.dml.vba.StutterPeriod > 5000.0)
 			return DCN_ZSTATE_SUPPORT_ALLOW;
 		else if (is_pwrseq0 && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr)
 			return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
@@ -1087,7 +1183,7 @@ void dcn20_calculate_dlg_params(struct dc *dc,
 		pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
 		pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
 
-		if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) {
 			// Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
 			context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
 			context->res_ctx.pipe_ctx[i].unbounded_req = false;
@@ -1437,7 +1533,7 @@ int dcn20_populate_dml_pipes_from_context(struct dc *dc,
 		 */
 		if (res_ctx->pipe_ctx[i].plane_state &&
 				(res_ctx->pipe_ctx[i].plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE ||
-				 res_ctx->pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM))
+				dc_state_get_pipe_subvp_type(context, &res_ctx->pipe_ctx[i]) == SUBVP_PHANTOM))
 			pipes[pipe_cnt].pipe.src.num_cursors = 0;
 		else
 			pipes[pipe_cnt].pipe.src.num_cursors = dc->dml.ip.number_of_cursors;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index 3686f1e7de3a..63c48c29ba49 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -3542,7 +3542,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 {
 	struct vba_vars_st *v = &mode_lib->vba;
 	int MinPrefetchMode, MaxPrefetchMode;
-	int i;
+	int i, start_state;
 	unsigned int j, k, m;
 	bool   EnoughWritebackUnits = true;
 	bool   WritebackModeSupport = true;
@@ -3553,6 +3553,11 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 	/*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
 
+	if (mode_lib->validate_max_state)
+		start_state = v->soc.num_states - 1;
+	else
+		start_state = 0;
+
 	CalculateMinAndMaxPrefetchMode(
 		mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
 		&MinPrefetchMode, &MaxPrefetchMode);
@@ -3851,7 +3856,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			v->SingleDPPViewportSizeSupportPerPlane,
 			&v->ViewportSizeSupport[0][0]);
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
 			v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
@@ -4007,7 +4012,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 	/*Total Available Pipes Support Check*/
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
 				v->TotalAvailablePipesSupport[i][j] = true;
@@ -4046,7 +4051,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		}
 	}
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
 			v->RequiresDSC[i][k] = false;
 			v->RequiresFEC[i][k] = false;
@@ -4174,7 +4179,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			}
 		}
 	}
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		v->DIOSupport[i] = true;
 		for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
 			if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
@@ -4185,7 +4190,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		}
 	}
 
-	for (i = 0; i < v->soc.num_states; ++i) {
+	for (i = start_state; i < v->soc.num_states; ++i) {
 		v->ODMCombine4To1SupportCheckOK[i] = true;
 		for (k = 0; k < v->NumberOfActivePlanes; ++k) {
 			if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
@@ -4197,7 +4202,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 	/* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		v->NotEnoughDSCUnits[i] = false;
 		v->TotalDSCUnitsRequired = 0.0;
 		for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
@@ -4217,7 +4222,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	}
 	/*DSC Delay per state*/
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
 			if (v->OutputBppPerState[i][k] == BPP_INVALID) {
 				v->BPP = 0.0;
@@ -4333,7 +4338,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
 	}
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
 				v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
@@ -5075,7 +5080,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
 	/*PTE Buffer Size Check*/
 
-	for (i = 0; i < v->soc.num_states; i++) {
+	for (i = start_state; i < v->soc.num_states; i++) {
 		for (j = 0; j < 2; j++) {
 			v->PTEBufferSizeNotExceeded[i][j] = true;
 			for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
@@ -5136,7 +5141,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	}
 	/*Mode Support, Voltage State and SOC Configuration*/
 
-	for (i = v->soc.num_states - 1; i >= 0; i--) {
+	for (i = v->soc.num_states - 1; i >= start_state; i--) {
 		for (j = 0; j < 2; j++) {
 			if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
 					&& v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
@@ -5158,7 +5163,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	}
 	{
 		unsigned int MaximumMPCCombine = 0;
-		for (i = v->soc.num_states; i >= 0; i--) {
+		for (i = v->soc.num_states; i >= start_state; i--) {
 			if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
 				v->VoltageLevel = i;
 				v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 9ec4172d1c2d..aa68d010cbfd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -32,6 +32,8 @@
 #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h"
 #include "dcn30/dcn30_resource.h"
 #include "link.h"
+#include "dc_state_priv.h"
+#include "resource.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -45,6 +47,14 @@ static const struct subvp_high_refresh_list subvp_high_refresh_list = {
 				{.width = 1920, .height = 1080, }},
 };
 
+static const struct subvp_active_margin_list subvp_active_margin_list = {
+			.min_refresh = 55,
+			.max_refresh = 65,
+			.res = {
+				{.width = 2560, .height = 1440, },
+				{.width = 1920, .height = 1080, }},
+};
+
 struct _vcs_dpi_ip_params_st dcn3_2_ip = {
 	.gpuvm_enable = 0,
 	.gpuvm_max_page_table_levels = 4,
@@ -282,7 +292,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
 
 		/* for subvp + DRR case, if subvp pipes are still present we support pstate */
 		if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported &&
-				dcn32_subvp_in_use(dc, context))
+				resource_subvp_in_use(dc, context))
 			vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support;
 
 		if (vlevel < context->bw_ctx.dml.vba.soc.num_states &&
@@ -333,7 +343,7 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
 		if (!pipe->stream)
 			continue;
 
-		if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (pipe->plane_state && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			pipes[pipe_idx].pipe.dest.vstartup_start =
 				get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
 			pipes[pipe_idx].pipe.dest.vupdate_offset =
@@ -616,7 +626,7 @@ static bool dcn32_assign_subvp_pipe(struct dc *dc,
 		if (pipe->plane_state && !pipe->top_pipe && !dcn32_is_center_timing(pipe) &&
 				!(pipe->stream->timing.pix_clk_100hz / 10000 > DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ) &&
 				(!dcn32_is_psr_capable(pipe) || (context->stream_count == 1 && dc->caps.dmub_caps.subvp_psr)) &&
-				pipe->stream->mall_stream_config.type == SUBVP_NONE &&
+				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE &&
 				(refresh_rate < 120 || dcn32_allow_subvp_high_refresh_rate(dc, context, pipe)) &&
 				!pipe->plane_state->address.tmz_surface &&
 				(vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0 ||
@@ -674,7 +684,7 @@ static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context
 
 		// Find the minimum pipe split count for non SubVP pipes
 		if (resource_is_pipe_type(pipe, OPP_HEAD) &&
-		    pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+			dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE) {
 			split_cnt = 0;
 			while (pipe) {
 				split_cnt++;
@@ -727,8 +737,8 @@ static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context)
 		 * and also to store the two main SubVP pipe pointers in subvp_pipes[2].
 		 */
 		if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
-		    pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
-			phantom = pipe->stream->mall_stream_config.paired_stream;
+			dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+			phantom = dc_state_get_paired_subvp_stream(context, pipe->stream);
 			microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) +
 					phantom->timing.v_addressable;
 
@@ -796,6 +806,9 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
 	int16_t stretched_drr_us = 0;
 	int16_t drr_stretched_vblank_us = 0;
 	int16_t max_vblank_mallregion = 0;
+	struct dc_stream_state *phantom_stream;
+	bool subvp_found = false;
+	bool drr_found = false;
 
 	// Find SubVP pipe
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -808,8 +821,10 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
 			continue;
 
 		// Find the SubVP pipe
-		if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+			subvp_found = true;
 			break;
+		}
 	}
 
 	// Find the DRR pipe
@@ -817,32 +832,37 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
 		drr_pipe = &context->res_ctx.pipe_ctx[i];
 
 		// We check for master pipe only
-		if (!resource_is_pipe_type(pipe, OTG_MASTER) ||
-				!resource_is_pipe_type(pipe, DPP_PIPE))
+		if (!resource_is_pipe_type(drr_pipe, OTG_MASTER) ||
+				!resource_is_pipe_type(drr_pipe, DPP_PIPE))
 			continue;
 
-		if (drr_pipe->stream->mall_stream_config.type == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param &&
-				(drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed))
+		if (dc_state_get_pipe_subvp_type(context, drr_pipe) == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param &&
+				(drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed)) {
+			drr_found = true;
 			break;
+		}
 	}
 
-	main_timing = &pipe->stream->timing;
-	phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing;
-	drr_timing = &drr_pipe->stream->timing;
-	prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
-			(double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
-			dc->caps.subvp_prefetch_end_to_mall_start_us;
-	subvp_active_us = main_timing->v_addressable * main_timing->h_total /
-			(double)(main_timing->pix_clk_100hz * 100) * 1000000;
-	drr_frame_us = drr_timing->v_total * drr_timing->h_total /
-			(double)(drr_timing->pix_clk_100hz * 100) * 1000000;
-	// P-State allow width and FW delays already included phantom_timing->v_addressable
-	mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
-			(double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
-	stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
-	drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total /
-			(double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us);
-	max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us;
+	if (subvp_found && drr_found) {
+		phantom_stream = dc_state_get_paired_subvp_stream(context, pipe->stream);
+		main_timing = &pipe->stream->timing;
+		phantom_timing = &phantom_stream->timing;
+		drr_timing = &drr_pipe->stream->timing;
+		prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
+				(double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
+				dc->caps.subvp_prefetch_end_to_mall_start_us;
+		subvp_active_us = main_timing->v_addressable * main_timing->h_total /
+				(double)(main_timing->pix_clk_100hz * 100) * 1000000;
+		drr_frame_us = drr_timing->v_total * drr_timing->h_total /
+				(double)(drr_timing->pix_clk_100hz * 100) * 1000000;
+		// P-State allow width and FW delays already included phantom_timing->v_addressable
+		mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
+				(double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
+		stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
+		drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total /
+				(double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us);
+		max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us;
+	}
 
 	/* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the
 	 * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis
@@ -887,6 +907,8 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
 	struct dc_crtc_timing *main_timing = NULL;
 	struct dc_crtc_timing *phantom_timing = NULL;
 	struct dc_crtc_timing *vblank_timing = NULL;
+	struct dc_stream_state *phantom_stream;
+	enum mall_stream_type pipe_mall_type;
 
 	/* For SubVP + VBLANK/DRR cases, we assume there can only be
 	 * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK
@@ -896,6 +918,7 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
 	 */
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		pipe = &context->res_ctx.pipe_ctx[i];
+		pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 		// We check for master pipe, but it shouldn't matter since we only need
 		// the pipe for timing info (stream should be same for any pipe splits)
@@ -903,18 +926,19 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
 				!resource_is_pipe_type(pipe, DPP_PIPE))
 			continue;
 
-		if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+		if (!found && pipe_mall_type == SUBVP_NONE) {
 			// Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe).
 			vblank_index = i;
 			found = true;
 		}
 
-		if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN)
 			subvp_pipe = pipe;
 	}
 	if (found) {
+		phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
 		main_timing = &subvp_pipe->stream->timing;
-		phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+		phantom_timing = &phantom_stream->timing;
 		vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing;
 		// Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe
 		// Also include the prefetch end to mallstart delay time
@@ -969,7 +993,7 @@ static bool subvp_subvp_admissable(struct dc *dc,
 			continue;
 
 		if (pipe->plane_state && !pipe->top_pipe &&
-				pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
 			refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
 				pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
 			refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
@@ -1018,23 +1042,23 @@ static bool subvp_validate_static_schedulability(struct dc *dc,
 
 	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+		enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
 		if (!pipe->stream)
 			continue;
 
 		if (pipe->plane_state && !pipe->top_pipe) {
-			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+			if (pipe_mall_type == SUBVP_MAIN)
 				subvp_count++;
-			if (pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+			if (pipe_mall_type == SUBVP_NONE)
 				non_subvp_pipes++;
-			}
 		}
 
 		// Count how many planes that aren't SubVP/phantom are capable of VACTIVE
 		// switching (SubVP + VACTIVE unsupported). In situations where we force
 		// SubVP for a VACTIVE plane, we don't want to increment the vactive_count.
 		if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vlevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] > 0 &&
-		    pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+				pipe_mall_type == SUBVP_NONE) {
 			vactive_count++;
 		}
 		pipe_idx++;
@@ -1070,7 +1094,7 @@ static void assign_subvp_index(struct dc *dc, struct dc_state *context)
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
 		if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) &&
-				pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) {
+				dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) {
 			pipe_ctx->subvp_index = index++;
 		} else {
 			pipe_ctx->subvp_index = 0;
@@ -1192,13 +1216,16 @@ static bool update_pipe_slice_table_with_split_flags(
 	 */
 	struct pipe_ctx *pipe;
 	bool odm;
-	int i;
+	int dc_pipe_idx, dml_pipe_idx = 0;
 	bool updated = false;
 
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		pipe = &context->res_ctx.pipe_ctx[i];
+	for (dc_pipe_idx = 0;
+			dc_pipe_idx < dc->res_pool->pipe_count; dc_pipe_idx++) {
+		pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx];
+		if (resource_is_pipe_type(pipe, FREE_PIPE))
+			continue;
 
-		if (merge[i]) {
+		if (merge[dc_pipe_idx]) {
 			if (resource_is_pipe_type(pipe, OPP_HEAD))
 				/* merging OPP head means reducing ODM slice
 				 * count by 1
@@ -1213,17 +1240,18 @@ static bool update_pipe_slice_table_with_split_flags(
 			updated = true;
 		}
 
-		if (split[i]) {
-			odm = vba->ODMCombineEnabled[vba->pipe_plane[i]] !=
+		if (split[dc_pipe_idx]) {
+			odm = vba->ODMCombineEnabled[vba->pipe_plane[dml_pipe_idx]] !=
 					dm_odm_combine_mode_disabled;
 			if (odm && resource_is_pipe_type(pipe, OPP_HEAD))
 				update_slice_table_for_stream(
-						table, pipe->stream, split[i] - 1);
+						table, pipe->stream, split[dc_pipe_idx] - 1);
 			else if (!odm && resource_is_pipe_type(pipe, DPP_PIPE))
 				update_slice_table_for_plane(table, pipe,
-						pipe->plane_state, split[i] - 1);
+						pipe->plane_state, split[dc_pipe_idx] - 1);
 			updated = true;
 		}
+		dml_pipe_idx++;
 	}
 	return updated;
 }
@@ -1233,15 +1261,11 @@ static void update_pipes_with_slice_table(struct dc *dc, struct dc_state *contex
 {
 	int i;
 
-	for (i = 0; i < table->odm_combine_count; i++) {
+	for (i = 0; i < table->odm_combine_count; i++)
 		resource_update_pipes_for_stream_with_slice_count(context,
 				dc->current_state, dc->res_pool,
 				table->odm_combines[i].stream,
 				table->odm_combines[i].slice_count);
-		/* TODO: move this into the function above */
-		dcn20_build_mapped_resource(dc, context,
-				table->odm_combines[i].stream);
-	}
 
 	for (i = 0; i < table->mpc_combine_count; i++)
 		resource_update_pipes_for_plane_with_slice_count(context,
@@ -1408,6 +1432,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 	unsigned int dc_pipe_idx = 0;
 	int i = 0;
 	bool found_supported_config = false;
+	int vlevel_temp = 0;
 
 	dc_assert_fp_enabled();
 
@@ -1440,13 +1465,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 	 */
 	if (!dc->debug.force_disable_subvp && !dc->caps.dmub_caps.gecc_enable && dcn32_all_pipes_have_stream_and_plane(dc, context) &&
 	    !dcn32_mpo_in_use(context) && !dcn32_any_surfaces_rotated(dc, context) && !is_test_pattern_enabled(context) &&
-		(*vlevel == context->bw_ctx.dml.soc.num_states ||
+		(*vlevel == context->bw_ctx.dml.soc.num_states || (vba->DRAMSpeedPerState[*vlevel] != vba->DRAMSpeedPerState[0] &&
+				vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported) ||
 	    vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported ||
 	    dc->debug.force_subvp_mclk_switch)) {
 
 		dcn32_merge_pipes_for_subvp(dc, context);
 		memset(merge, 0, MAX_PIPES * sizeof(bool));
 
+		vlevel_temp = *vlevel;
 		/* to re-initialize viewport after the pipe merge */
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
@@ -1515,10 +1542,14 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 			}
 		}
 
+		if (vba->DRAMSpeedPerState[*vlevel] >= vba->DRAMSpeedPerState[vlevel_temp])
+			found_supported_config = false;
+
 		// If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
 		// remove phantom pipes and repopulate dml pipes
 		if (!found_supported_config) {
-			dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
+			dc_state_remove_phantom_streams_and_planes(dc, context);
+			dc_state_release_phantom_streams_and_planes(dc, context);
 			vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported;
 			*pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false);
 
@@ -1670,7 +1701,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
 		pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt,
 				pipe_idx);
 
-		if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) {
 			// Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
 			context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
 			context->res_ctx.pipe_ctx[i].unbounded_req = false;
@@ -1702,7 +1733,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
 				context->res_ctx.pipe_ctx[i].plane_state != context->res_ctx.pipe_ctx[i].top_pipe->plane_state) &&
 				context->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) {
 			/* SS: all active surfaces stored in MALL */
-			if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type != SUBVP_PHANTOM) {
+			if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) != SUBVP_PHANTOM) {
 				context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes;
 
 				if (context->res_ctx.pipe_ctx[i].stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) {
@@ -1916,7 +1947,8 @@ bool dcn32_internal_validate_bw(struct dc *dc,
 		return false;
 
 	// For each full update, remove all existing phantom pipes first
-	dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate);
+	dc_state_remove_phantom_streams_and_planes(dc, context);
+	dc_state_release_phantom_streams_and_planes(dc, context);
 
 	dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
 
@@ -2178,6 +2210,7 @@ bool dcn32_internal_validate_bw(struct dc *dc,
 		int i;
 
 		pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+		dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes);
 
 		/* repopulate_pipes = 1 means the pipes were either split or merged. In this case
 		 * we have to re-calculate the DET allocation and run through DML once more to
@@ -2186,7 +2219,9 @@ bool dcn32_internal_validate_bw(struct dc *dc,
 		 * */
 		context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
 					dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
+
 		vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+
 		if (vlevel == context->bw_ctx.dml.soc.num_states) {
 			/* failed after DET size changes */
 			goto validate_fail;
@@ -2231,13 +2266,14 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	int i, pipe_idx, vlevel_temp = 0;
 	double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz;
 	double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+	double dram_speed_from_validation = context->bw_ctx.dml.vba.DRAMSpeed;
 	double dcfclk_from_fw_based_mclk_switching = dcfclk_from_validation;
 	bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] !=
 			dm_dram_clock_change_unsupported;
 	unsigned int dummy_latency_index = 0;
 	int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
 	unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
-	bool subvp_in_use = dcn32_subvp_in_use(dc, context);
+	bool subvp_active = resource_subvp_in_use(dc, context);
 	unsigned int min_dram_speed_mts_margin;
 	bool need_fclk_lat_as_dummy = false;
 	bool is_subvp_p_drr = false;
@@ -2246,7 +2282,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	dc_assert_fp_enabled();
 
 	/* need to find dummy latency index for subvp */
-	if (subvp_in_use) {
+	if (subvp_active) {
 		/* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */
 		if (!pstate_en) {
 			context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
@@ -2418,7 +2454,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	}
 
 	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
-		min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+		min_dram_speed_mts = dram_speed_from_validation;
 		min_dram_speed_mts_margin = 160;
 
 		context->bw_ctx.dml.soc.dram_clock_change_latency_us =
@@ -2432,7 +2468,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 				dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16;
 		}
 
-		if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) {
+		if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_active) {
 			/* find largest table entry that is lower than dram speed,
 			 * but lower than DPM0 still uses DPM0
 			 */
@@ -3294,25 +3330,24 @@ bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe)
 {
 	bool allow = false;
 	uint32_t refresh_rate = 0;
+	uint32_t min_refresh = subvp_active_margin_list.min_refresh;
+	uint32_t max_refresh = subvp_active_margin_list.max_refresh;
+	uint32_t i;
 
-	/* Allow subvp on displays that have active margin for 2560x1440@60hz displays
-	 * only for now. There must be no scaling as well.
-	 *
-	 * For now we only enable on 2560x1440@60hz displays to enable 4K60 + 1440p60 configs
-	 * for p-state switching.
-	 */
-	if (pipe->stream && pipe->plane_state) {
-		refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 +
-						pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1)
-						/ (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total);
-		if (pipe->stream->timing.v_addressable == 1440 &&
-				pipe->stream->timing.h_addressable == 2560 &&
-				refresh_rate >= 55 && refresh_rate <= 65 &&
-				pipe->plane_state->src_rect.height == 1440 &&
-				pipe->plane_state->src_rect.width == 2560 &&
-				pipe->plane_state->dst_rect.height == 1440 &&
-				pipe->plane_state->dst_rect.width == 2560)
+	for (i = 0; i < SUBVP_ACTIVE_MARGIN_LIST_LEN; i++) {
+		uint32_t width = subvp_active_margin_list.res[i].width;
+		uint32_t height = subvp_active_margin_list.res[i].height;
+
+		refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
+			pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
+		refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
+		refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
+
+		if (refresh_rate >= min_refresh && refresh_rate <= max_refresh &&
+				dcn32_check_native_scaling_for_res(pipe, width, height)) {
 			allow = true;
+			break;
+		}
 	}
 	return allow;
 }
@@ -3431,7 +3466,15 @@ void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *co
 	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 		const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (!pipe->stream)
+		/* In DCN32/321, FPO uses per-pipe P-State force.
+		 * If there's no planes, HUBP is power gated and
+		 * therefore programming UCLK_PSTATE_FORCE does
+		 * nothing (P-State will always be asserted naturally
+		 * on a pipe that has HUBP power gated. Therefore we
+		 * only want to enable FPO if the FPO pipe has both
+		 * a stream and a plane.
+		 */
+		if (!pipe->stream || !pipe->plane_state)
 			continue;
 
 		if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) {
@@ -3485,7 +3528,7 @@ void dcn32_set_clock_limits(const struct _vcs_dpi_soc_bounding_box_st *soc_bb)
 void dcn32_override_min_req_memclk(struct dc *dc, struct dc_state *context)
 {
 	// WA: restrict FPO and SubVP to use first non-strobe mode (DCN32 BW issue)
-	if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dcn32_subvp_in_use(dc, context)) &&
+	if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || resource_subvp_in_use(dc, context)) &&
 			dc->dml.soc.num_chans <= 8) {
 		int num_mclk_levels = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index cbdfb762c10c..6c84b0fa40f4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -813,6 +813,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 					(v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ ||
 						v->DCFCLKPerState[mode_lib->vba.VoltageLevel] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ?
 							mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
+					mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false,
+
 					/* Output */
 					&v->DSTXAfterScaler[k],
 					&v->DSTYAfterScaler[k],
@@ -3317,6 +3319,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							v->SwathHeightCThisState[k], v->TWait,
 							(v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKState[i][j] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ?
 									mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
+							mode_lib->vba.PrefetchModePerState[i][j] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false,
 
 							/* Output */
 							&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index d940dfa5ae43..80fccd4999a5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -3423,6 +3423,7 @@ bool dml32_CalculatePrefetchSchedule(
 		unsigned int SwathHeightC,
 		double TWait,
 		double TPreReq,
+		bool ExtendPrefetchIfPossible,
 		/* Output */
 		double   *DSTXAfterScaler,
 		double   *DSTYAfterScaler,
@@ -3892,12 +3893,32 @@ bool dml32_CalculatePrefetchSchedule(
 			/* Clamp to oto for bandwidth calculation */
 			LinesForPrefetchBandwidth = dst_y_prefetch_oto;
 		} else {
-			*DestinationLinesForPrefetch = dst_y_prefetch_equ;
-			TimeForFetchingMetaPTE = Tvm_equ;
-			TimeForFetchingRowInVBlank = Tr0_equ;
-			*PrefetchBandwidth = prefetch_bw_equ;
-			/* Clamp to equ for bandwidth calculation */
-			LinesForPrefetchBandwidth = dst_y_prefetch_equ;
+			/* For mode programming we want to extend the prefetch as much as possible
+			 * (up to oto, or as long as we can for equ) if we're not already applying
+			 * the 60us prefetch requirement. This is to avoid intermittent underflow
+			 * issues during prefetch.
+			 *
+			 * The prefetch extension is applied under the following scenarios:
+			 * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
+			 * 2. We're using subvp or drr methods of p-state switch, in which case we
+			 *    we don't care if prefetch takes up more of the blanking time
+			 *
+			 * Mode programming typically chooses the smallest prefetch time possible
+			 * (i.e. highest bandwidth during prefetch) presumably to create margin between
+			 * p-states / c-states that happen in vblank and prefetch. Therefore we only
+			 * apply this prefetch extension when p-state in vblank is not required (UCLK
+			 * p-states take up the most vblank time).
+			 */
+			if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
+				MyError = true;
+			} else {
+				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
+				TimeForFetchingMetaPTE = Tvm_equ;
+				TimeForFetchingRowInVBlank = Tr0_equ;
+				*PrefetchBandwidth = prefetch_bw_equ;
+				/* Clamp to equ for bandwidth calculation */
+				LinesForPrefetchBandwidth = dst_y_prefetch_equ;
+			}
 		}
 
 		*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
index 592d174df6c6..5d34735df83d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -747,6 +747,7 @@ bool dml32_CalculatePrefetchSchedule(
 		unsigned int SwathHeightC,
 		double TWait,
 		double TPreReq,
+		bool ExtendPrefetchIfPossible,
 		/* Output */
 		double   *DSTXAfterScaler,
 		double   *DSTYAfterScaler,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index a5fe523668e9..3d12dabd39e4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -124,7 +124,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 600.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 186.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 		{
 			.state = 1,
@@ -133,7 +133,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 810.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 209.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 		{
 			.state = 2,
@@ -142,7 +142,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 810.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 209.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 		{
 			.state = 3,
@@ -151,7 +151,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 810.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 371.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 		{
 			.state = 4,
@@ -160,14 +160,14 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 			.phyclk_mhz = 810.0,
 			.phyclk_d18_mhz = 667.0,
 			.dscclk_mhz = 417.0,
-			.dtbclk_mhz = 625.0,
+			.dtbclk_mhz = 600.0,
 		},
 	},
 	.num_states = 5,
-	.sr_exit_time_us = 9.0,
-	.sr_enter_plus_exit_time_us = 11.0,
-	.sr_exit_z8_time_us = 50.0, /*changed from 442.0*/
-	.sr_enter_plus_exit_z8_time_us = 50.0,/*changed from 560.0*/
+	.sr_exit_time_us = 14.0,
+	.sr_enter_plus_exit_time_us = 16.0,
+	.sr_exit_z8_time_us = 525.0,
+	.sr_enter_plus_exit_z8_time_us = 715.0,
 	.fclk_change_latency_us = 20.0,
 	.usr_retraining_latency_us = 2,
 	.writeback_latency_us = 12.0,
@@ -326,9 +326,74 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc,
 		dcn3_5_soc.dram_clock_change_latency_us =
 			dc->debug.dram_clock_change_latency_ns / 1000.0;
 	}
+
+	if (dc->bb_overrides.dram_clock_change_latency_ns > 0)
+		dcn3_5_soc.dram_clock_change_latency_us =
+			dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+
+	if (dc->bb_overrides.sr_exit_time_ns > 0)
+		dcn3_5_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+
+	if (dc->bb_overrides.sr_enter_plus_exit_time_ns > 0)
+		dcn3_5_soc.sr_enter_plus_exit_time_us =
+			dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+
+	if (dc->bb_overrides.sr_exit_z8_time_ns > 0)
+		dcn3_5_soc.sr_exit_z8_time_us = dc->bb_overrides.sr_exit_z8_time_ns / 1000.0;
+
+	if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns > 0)
+		dcn3_5_soc.sr_enter_plus_exit_z8_time_us =
+			dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0;
+
 	/*temp till dml2 fully work without dml1*/
 	dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip,
 				DML_PROJECT_DCN31);
+
+	/*copy to dml2, before dml2_create*/
+	if (clk_table->num_entries > 2) {
+
+		for (i = 0; i < clk_table->num_entries; i++) {
+			dc->dml2_options.bbox_overrides.clks_table.num_states =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz =
+				clock_limits[i].dcfclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz =
+				clock_limits[i].fabricclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz =
+				clock_limits[i].dispclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz =
+				clock_limits[i].dppclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz =
+				clock_limits[i].socclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
+				clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio;
+			dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+				clock_limits[i].dtbclk_mhz;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
+				clk_table->num_entries;
+			dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+				clk_table->num_entries;
+		}
+	}
+
+	/* Update latency values */
+	dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_5_soc.dram_clock_change_latency_us;
+
+	dc->dml2_options.bbox_overrides.sr_exit_latency_us = dcn3_5_soc.sr_exit_time_us;
+	dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = dcn3_5_soc.sr_enter_plus_exit_time_us;
+
+	dc->dml2_options.bbox_overrides.sr_exit_z8_time_us = dcn3_5_soc.sr_exit_z8_time_us;
+	dc->dml2_options.bbox_overrides.sr_enter_plus_exit_z8_time_us = dcn3_5_soc.sr_enter_plus_exit_z8_time_us;
 }
 
 static bool is_dual_plane(enum surface_pixel_format format)
@@ -507,3 +572,37 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc,
 
 	return pipe_cnt;
 }
+
+void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context)
+{
+	enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW;
+	unsigned int i, plane_count = 0;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		if (context->res_ctx.pipe_ctx[i].plane_state)
+			plane_count++;
+	}
+
+	if (plane_count == 0) {
+		support = DCN_ZSTATE_SUPPORT_ALLOW;
+	} else if (plane_count == 1 && context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+		struct dc_link *link = context->streams[0]->sink->link;
+		bool is_pwrseq0 = link && link->link_index == 0;
+		bool is_psr1 = link && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr;
+		int minmum_z8_residency =
+			dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
+		bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
+		int minmum_z10_residency =
+			dc->debug.minimum_z10_residency_time > 0 ? dc->debug.minimum_z10_residency_time : 5000;
+		bool allow_z10 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z10_residency;
+
+		if (is_pwrseq0 && allow_z10)
+			support = DCN_ZSTATE_SUPPORT_ALLOW;
+		else if (is_pwrseq0 && is_psr1)
+			support = allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
+		else if (allow_z8)
+			support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
+	}
+
+	context->bw_ctx.bw.dcn.clk.zstate_support = support;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h
index e8d5a170893e..067480fc3691 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h
@@ -39,4 +39,6 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc,
 					      display_e2e_pipe_params_st *pipes,
 					      bool fast_validate);
 
+void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context);
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
index 510be909cd75..b95bf27f2fe2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -5420,7 +5420,7 @@ static void CalculateOutputLink(
 					*OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
 												OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
 
-					if (OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+					if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
 						*RequiresDSC = true;
 						LinkDSCEnable = true;
 						*OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
@@ -9447,12 +9447,12 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 
 		// Output
 		CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark
-		CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j];
+		CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[0];
 		CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0][0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
 		CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[]
-		CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j];
+		CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[0];
 		CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported
-		CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j];
+		CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[0];
 
 		CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 			&mode_lib->scratch,
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
index 1a2b24cc6b61..0baf39d64a2d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
@@ -772,18 +772,29 @@ static unsigned int get_mpc_factor(struct dml2_context *ctx,
 		const struct dc_state *state,
 		const struct dml_display_cfg_st *disp_cfg,
 		struct dml2_dml_to_dc_pipe_mapping *mapping,
-		const struct dc_stream_status *status, unsigned int stream_id,
+		const struct dc_stream_status *status,
+		const struct dc_stream_state *stream,
 		int plane_idx)
 {
 	unsigned int plane_id;
 	unsigned int cfg_idx;
+	unsigned int mpc_factor;
 
-	get_plane_id(ctx, state, status->plane_states[plane_idx], stream_id, plane_idx, &plane_id);
+	get_plane_id(ctx, state, status->plane_states[plane_idx],
+			stream->stream_id, plane_idx, &plane_id);
 	cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id);
-	if (ctx->architecture == dml2_architecture_20)
-		return (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx];
-	ASSERT(false);
-	return 1;
+	if (ctx->architecture == dml2_architecture_20) {
+		mpc_factor = (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx];
+	} else {
+		mpc_factor = 1;
+		ASSERT(false);
+	}
+
+	/* For stereo timings, we need to pipe split */
+	if (dml2_is_stereo_timing(stream))
+		mpc_factor = 2;
+
+	return mpc_factor;
 }
 
 static unsigned int get_odm_factor(
@@ -820,14 +831,13 @@ static void populate_mpc_factors_for_stream(
 		unsigned int mpc_factors[MAX_PIPES])
 {
 	const struct dc_stream_status *status = &state->stream_status[stream_idx];
-	unsigned int stream_id = state->streams[stream_idx]->stream_id;
 	int i;
 
 	for (i = 0; i < status->plane_count; i++)
 		if (odm_factor == 1)
 			mpc_factors[i] = get_mpc_factor(
 					ctx, state, disp_cfg, mapping, status,
-					stream_id, i);
+					state->streams[stream_idx], i);
 		else
 			mpc_factors[i] = 1;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h
index e85866db80ff..7ca7f2a743c2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h
@@ -38,5 +38,6 @@
 #include "core_types.h"
 #include "dsc.h"
 #include "clk_mgr.h"
+#include "dc_state_priv.h"
 
 #endif //__DML2_DC_TYPES_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
index 32f8a43af3d6..282d70e2b18a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
@@ -51,7 +51,7 @@ unsigned int dml2_helper_calculate_num_ways_for_subvp(struct dml2_context *ctx,
 
 		// Find the phantom pipes
 		if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
 			mblk_width = ctx->config.mall_cfg.mblk_width_pixels;
 			mblk_height = bytes_per_pixel == 4 ? mblk_width = ctx->config.mall_cfg.mblk_height_4bpe_pixels : ctx->config.mall_cfg.mblk_height_8bpe_pixels;
@@ -253,7 +253,7 @@ static bool assign_subvp_pipe(struct dml2_context *ctx, struct dc_state *context
 		 *   to combine this with SubVP can cause issues with the scheduling).
 		 */
 		if (pipe->plane_state && !pipe->top_pipe &&
-				pipe->stream->mall_stream_config.type == SUBVP_NONE && refresh_rate < 120 &&
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_NONE && refresh_rate < 120 &&
 				vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) {
 			while (pipe) {
 				num_pipes++;
@@ -317,7 +317,7 @@ static bool enough_pipes_for_subvp(struct dml2_context *ctx, struct dc_state *st
 
 		// Find the minimum pipe split count for non SubVP pipes
 		if (pipe->stream && !pipe->top_pipe &&
-		    pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_NONE) {
 			split_cnt = 0;
 			while (pipe) {
 				split_cnt++;
@@ -372,8 +372,8 @@ static bool subvp_subvp_schedulable(struct dml2_context *ctx, struct dc_state *c
 		 * and also to store the two main SubVP pipe pointers in subvp_pipes[2].
 		 */
 		if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
-		    pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
-			phantom = pipe->stream->mall_stream_config.paired_stream;
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+			phantom = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream);
 			microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) +
 					phantom->timing.v_addressable;
 
@@ -435,6 +435,7 @@ bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context
 	struct pipe_ctx *pipe = NULL;
 	struct dc_crtc_timing *main_timing = NULL;
 	struct dc_crtc_timing *phantom_timing = NULL;
+	struct dc_stream_state *phantom_stream;
 	int16_t prefetch_us = 0;
 	int16_t mall_region_us = 0;
 	int16_t drr_frame_us = 0;	// nominal frame time
@@ -453,12 +454,13 @@ bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context
 			continue;
 
 		// Find the SubVP pipe
-		if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
 			break;
 	}
 
+	phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream);
 	main_timing = &pipe->stream->timing;
-	phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing;
+	phantom_timing = &phantom_stream->timing;
 	prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
 			(double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
 			ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us;
@@ -519,6 +521,8 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state *
 	struct dc_crtc_timing *main_timing = NULL;
 	struct dc_crtc_timing *phantom_timing = NULL;
 	struct dc_crtc_timing *vblank_timing = NULL;
+	struct dc_stream_state *phantom_stream;
+	enum mall_stream_type pipe_mall_type;
 
 	/* For SubVP + VBLANK/DRR cases, we assume there can only be
 	 * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK
@@ -528,19 +532,20 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state *
 	 */
 	for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
 		pipe = &context->res_ctx.pipe_ctx[i];
+		pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe);
 
 		// We check for master pipe, but it shouldn't matter since we only need
 		// the pipe for timing info (stream should be same for any pipe splits)
 		if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe)
 			continue;
 
-		if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+		if (!found && pipe_mall_type == SUBVP_NONE) {
 			// Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe).
 			vblank_index = i;
 			found = true;
 		}
 
-		if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+		if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN)
 			subvp_pipe = pipe;
 	}
 	// Use ignore_msa_timing_param flag to identify as DRR
@@ -548,8 +553,9 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state *
 		// SUBVP + DRR case
 		schedulable = dml2_svp_drr_schedulable(ctx, context, &context->res_ctx.pipe_ctx[vblank_index].stream->timing);
 	} else if (found) {
+		phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, subvp_pipe->stream);
 		main_timing = &subvp_pipe->stream->timing;
-		phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+		phantom_timing = &phantom_stream->timing;
 		vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing;
 		// Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe
 		// Also include the prefetch end to mallstart delay time
@@ -602,19 +608,20 @@ bool dml2_svp_validate_static_schedulability(struct dml2_context *ctx, struct dc
 
 	for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+		enum mall_stream_type pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe);
 
 		if (!pipe->stream)
 			continue;
 
 		if (pipe->plane_state && !pipe->top_pipe &&
-				pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+				pipe_mall_type == SUBVP_MAIN)
 			subvp_count++;
 
 		// Count how many planes that aren't SubVP/phantom are capable of VACTIVE
 		// switching (SubVP + VACTIVE unsupported). In situations where we force
 		// SubVP for a VACTIVE plane, we don't want to increment the vactive_count.
 		if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 &&
-		    pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+		    pipe_mall_type == SUBVP_NONE) {
 			vactive_count++;
 		}
 		pipe_idx++;
@@ -708,14 +715,10 @@ static void set_phantom_stream_timing(struct dml2_context *ctx, struct dc_state
 static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int dc_pipe_idx, unsigned int svp_height, unsigned int vstartup)
 {
 	struct pipe_ctx *ref_pipe = &state->res_ctx.pipe_ctx[dc_pipe_idx];
-	struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_stream_for_sink(ref_pipe->stream->sink);
-
-	phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
-	phantom_stream->dpms_off = true;
-	phantom_stream->mall_stream_config.type = SUBVP_PHANTOM;
-	phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream;
-	ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN;
-	ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream;
+	struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_phantom_stream(
+			ctx->config.svp_pstate.callbacks.dc,
+			state,
+			ref_pipe->stream);
 
 	/* stream has limited viewport and small timing */
 	memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing));
@@ -723,7 +726,10 @@ static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, s
 	memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst));
 	set_phantom_stream_timing(ctx, state, ref_pipe, phantom_stream, dc_pipe_idx, svp_height, vstartup);
 
-	ctx->config.svp_pstate.callbacks.add_stream_to_ctx(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream);
+	ctx->config.svp_pstate.callbacks.add_phantom_stream(ctx->config.svp_pstate.callbacks.dc,
+			state,
+			phantom_stream,
+			ref_pipe->stream);
 	return phantom_stream;
 }
 
@@ -740,7 +746,10 @@ static void enable_phantom_plane(struct dml2_context *ctx,
 		if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) {
 			phantom_plane = prev_phantom_plane;
 		} else {
-			phantom_plane = ctx->config.svp_pstate.callbacks.create_plane(ctx->config.svp_pstate.callbacks.dc);
+			phantom_plane = ctx->config.svp_pstate.callbacks.create_phantom_plane(
+					ctx->config.svp_pstate.callbacks.dc,
+					state,
+					curr_pipe->plane_state);
 		}
 
 		memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address));
@@ -763,9 +772,7 @@ static void enable_phantom_plane(struct dml2_context *ctx,
 		phantom_plane->clip_rect.y = 0;
 		phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable;
 
-		phantom_plane->is_phantom = true;
-
-		ctx->config.svp_pstate.callbacks.add_plane_to_context(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state);
+		ctx->config.svp_pstate.callbacks.add_phantom_plane(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state);
 
 		curr_pipe = curr_pipe->bottom_pipe;
 		prev_phantom_plane = phantom_plane;
@@ -790,7 +797,7 @@ static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_
 		// We determine which phantom pipes were added by comparing with
 		// the phantom stream.
 		if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
 			pipe->stream->use_dynamic_meta = false;
 			pipe->plane_state->flip_immediate = false;
 			if (!ctx->config.svp_pstate.callbacks.build_scaling_params(pipe)) {
@@ -800,7 +807,7 @@ static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_
 	}
 }
 
-static bool remove_all_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context)
+static bool remove_all_phantom_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context)
 {
 	int i, old_plane_count;
 	struct dc_stream_status *stream_status = NULL;
@@ -821,9 +828,11 @@ static bool remove_all_planes_for_stream(struct dml2_context *ctx, struct dc_str
 	for (i = 0; i < old_plane_count; i++)
 		del_planes[i] = stream_status->plane_states[i];
 
-	for (i = 0; i < old_plane_count; i++)
-		if (!ctx->config.svp_pstate.callbacks.remove_plane_from_context(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context))
+	for (i = 0; i < old_plane_count; i++) {
+		if (!ctx->config.svp_pstate.callbacks.remove_phantom_plane(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context))
 			return false;
+		ctx->config.svp_pstate.callbacks.release_phantom_plane(ctx->config.svp_pstate.callbacks.dc, context, del_planes[i]);
+	}
 
 	return true;
 }
@@ -832,35 +841,21 @@ bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state
 {
 	int i;
 	bool removed_pipe = false;
-	struct dc_plane_state *phantom_plane = NULL;
 	struct dc_stream_state *phantom_stream = NULL;
 
 	for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
 		struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
 		// build scaling params for phantom pipes
-		if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-			phantom_plane = pipe->plane_state;
+		if (pipe->plane_state && pipe->stream && ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
 			phantom_stream = pipe->stream;
 
-			remove_all_planes_for_stream(ctx, pipe->stream, state);
-			ctx->config.svp_pstate.callbacks.remove_stream_from_ctx(ctx->config.svp_pstate.callbacks.dc, state, pipe->stream);
-
-			/* Ref count is incremented on allocation and also when added to the context.
-			 * Therefore we must call release for the the phantom plane and stream once
-			 * they are removed from the ctx to finally decrement the refcount to 0 to free.
-			 */
-			ctx->config.svp_pstate.callbacks.plane_state_release(phantom_plane);
-			ctx->config.svp_pstate.callbacks.stream_release(phantom_stream);
+			remove_all_phantom_planes_for_stream(ctx, phantom_stream, state);
+			ctx->config.svp_pstate.callbacks.remove_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream);
+			ctx->config.svp_pstate.callbacks.release_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream);
 
 			removed_pipe = true;
 		}
 
-		// Clear all phantom stream info
-		if (pipe->stream) {
-			pipe->stream->mall_stream_config.type = SUBVP_NONE;
-			pipe->stream->mall_stream_config.paired_stream = NULL;
-		}
-
 		if (pipe->plane_state) {
 			pipe->plane_state->is_phantom = false;
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 75171bee6f71..fa6a93dd9629 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -341,25 +341,45 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
 		break;
 	}
 
-	/* Override from passed values, mainly for debugging purposes, if available */
-	if (dml2->config.bbox_overrides.sr_exit_latency_us) {
-		p->in_states->state_array[0].sr_exit_time_us = dml2->config.bbox_overrides.sr_exit_latency_us;
-	}
+	if (dml2->config.bbox_overrides.clks_table.num_states)
+			p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states;
+
+	/* Override from passed values, if available */
+	for (i = 0; i < p->in_states->num_states; i++) {
+		if (dml2->config.bbox_overrides.sr_exit_latency_us) {
+			p->in_states->state_array[i].sr_exit_time_us =
+				dml2->config.bbox_overrides.sr_exit_latency_us;
+		}
 
-	if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) {
-		p->in_states->state_array[0].sr_enter_plus_exit_time_us = dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us;
-	}
+		if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) {
+			p->in_states->state_array[i].sr_enter_plus_exit_time_us =
+				dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us;
+		}
 
-	if (dml2->config.bbox_overrides.urgent_latency_us) {
-		p->in_states->state_array[0].urgent_latency_pixel_data_only_us = dml2->config.bbox_overrides.urgent_latency_us;
-	}
+		if (dml2->config.bbox_overrides.sr_exit_z8_time_us) {
+			p->in_states->state_array[i].sr_exit_z8_time_us =
+				dml2->config.bbox_overrides.sr_exit_z8_time_us;
+		}
 
-	if (dml2->config.bbox_overrides.dram_clock_change_latency_us) {
-		p->in_states->state_array[0].dram_clock_change_latency_us = dml2->config.bbox_overrides.dram_clock_change_latency_us;
-	}
+		if (dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us) {
+			p->in_states->state_array[i].sr_enter_plus_exit_z8_time_us =
+				dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us;
+		}
 
-	if (dml2->config.bbox_overrides.fclk_change_latency_us) {
-		p->in_states->state_array[0].fclk_change_latency_us = dml2->config.bbox_overrides.fclk_change_latency_us;
+		if (dml2->config.bbox_overrides.urgent_latency_us) {
+			p->in_states->state_array[i].urgent_latency_pixel_data_only_us =
+				dml2->config.bbox_overrides.urgent_latency_us;
+		}
+
+		if (dml2->config.bbox_overrides.dram_clock_change_latency_us) {
+			p->in_states->state_array[i].dram_clock_change_latency_us =
+				dml2->config.bbox_overrides.dram_clock_change_latency_us;
+		}
+
+		if (dml2->config.bbox_overrides.fclk_change_latency_us) {
+			p->in_states->state_array[i].fclk_change_latency_us =
+				dml2->config.bbox_overrides.fclk_change_latency_us;
+		}
 	}
 
 	/* DCFCLK stas values are project specific */
@@ -380,7 +400,6 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
 	}
 	/* Copy clocks tables entries, if available */
 	if (dml2->config.bbox_overrides.clks_table.num_states) {
-		p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states;
 
 		for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) {
 			p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz;
@@ -406,8 +425,9 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
 		}
 
 		for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels; i++) {
-			p->in_states->state_array[i].dtbclk_mhz =
-				dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz;
+			if (dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz > 0)
+				p->in_states->state_array[i].dtbclk_mhz =
+					dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz;
 		}
 
 		for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels; i++) {
@@ -498,8 +518,8 @@ void dml2_translate_socbb_params(const struct dc *in, struct soc_bounding_box_st
 	out->do_urgent_latency_adjustment = in_soc_params->do_urgent_latency_adjustment;
 	out->dram_channel_width_bytes = (dml_uint_t)in_soc_params->dram_channel_width_bytes;
 	out->fabric_datapath_to_dcn_data_return_bytes = (dml_uint_t)in_soc_params->fabric_datapath_to_dcn_data_return_bytes;
-	out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes * 1024;
-	out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes * 1024;
+	out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes / 1024;
+	out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes / 1024;
 	out->mall_allocated_for_dcn_mbytes = (dml_uint_t)in_soc_params->mall_allocated_for_dcn_mbytes;
 	out->max_avg_dram_bw_use_normal_percent = in_soc_params->max_avg_dram_bw_use_normal_percent;
 	out->max_avg_fabric_bw_use_normal_percent = in_soc_params->max_avg_fabric_bw_use_normal_percent;
@@ -1029,8 +1049,10 @@ static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2,
 
 void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg)
 {
-	int i = 0, j = 0;
+	int i = 0, j = 0, k = 0;
 	int disp_cfg_stream_location, disp_cfg_plane_location;
+	enum mall_stream_type stream_mall_type;
+	struct pipe_ctx *current_pipe_context;
 
 	for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
 		dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] = false;
@@ -1040,14 +1062,27 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 	}
 
 	//Generally these are set by referencing our latest BB/IP params in dcn32_resource.c file
-	dml_dispcfg->plane.GPUVMEnable = true;
-	dml_dispcfg->plane.GPUVMMaxPageTableLevels = 4;
-	dml_dispcfg->plane.HostVMEnable = false;
+	dml_dispcfg->plane.GPUVMEnable = dml2->v20.dml_core_ctx.ip.gpuvm_enable;
+	dml_dispcfg->plane.GPUVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.gpuvm_max_page_table_levels;
+	dml_dispcfg->plane.HostVMEnable = dml2->v20.dml_core_ctx.ip.hostvm_enable;
+	dml_dispcfg->plane.HostVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.hostvm_max_page_table_levels;
+	if (dml2->v20.dml_core_ctx.ip.hostvm_enable)
+		dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter;
 
 	dml2_populate_pipe_to_plane_index_mapping(dml2, context);
 
 	for (i = 0; i < context->stream_count; i++) {
+		current_pipe_context = NULL;
+		for (k = 0; k < MAX_PIPES; k++) {
+			/* find one pipe allocated to this stream for the purpose of getting
+			info about the link later */
+			if (context->streams[i] == context->res_ctx.pipe_ctx[k].stream) {
+				current_pipe_context = &context->res_ctx.pipe_ctx[k];
+				break;
+			}
+		}
 		disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg);
+		stream_mall_type = dc_state_get_stream_subvp_type(context, context->streams[i]);
 
 		if (disp_cfg_stream_location < 0)
 			disp_cfg_stream_location = dml_dispcfg->num_timings++;
@@ -1055,7 +1090,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 		ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__);
 
 		populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]);
-		populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], &context->res_ctx.pipe_ctx[i]);
+		populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context);
 		switch (context->streams[i]->debug.force_odm_combine_segments) {
 		case 2:
 			dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_stream_location] = dml_odm_use_policy_combine_2to1;
@@ -1092,10 +1127,10 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 				populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]);
 				populate_dml_plane_cfg_from_plane_state(&dml_dispcfg->plane, disp_cfg_plane_location, context->stream_status[i].plane_states[j], context);
 
-				if (context->streams[i]->mall_stream_config.type == SUBVP_MAIN) {
+				if (stream_mall_type == SUBVP_MAIN) {
 					dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport;
 					dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_optimize;
-				} else if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM) {
+				} else if (stream_mall_type == SUBVP_PHANTOM) {
 					dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe;
 					dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_disable;
 					dml2->v20.dml_core_ctx.policy.ImmediateFlipRequirement[disp_cfg_plane_location] = dml_immediate_flip_not_required;
@@ -1112,7 +1147,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 
 				if (j >= 1) {
 					populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]);
-					populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], &context->res_ctx.pipe_ctx[i]);
+					populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context);
 					switch (context->streams[i]->debug.force_odm_combine_segments) {
 					case 2:
 						dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1;
@@ -1124,9 +1159,9 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
 						break;
 					}
 
-					if (context->streams[i]->mall_stream_config.type == SUBVP_MAIN)
+					if (stream_mall_type == SUBVP_MAIN)
 						dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport;
-					else if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM)
+					else if (stream_mall_type == SUBVP_PHANTOM)
 						dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe;
 
 					dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_plane_location] = context->streams[i]->stream_id;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
index 2498b8341199..1068b962d1c1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -155,8 +155,20 @@ unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, e
 
 bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx)
 {
+	if (pipe_ctx == NULL || pipe_ctx->stream == NULL)
+		return false;
+
 	/* If this assert is hit then we have a link encoder dynamic management issue */
 	ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true);
+
+	/* Count MST hubs once by treating only 1st remote sink in topology as an encoder */
+	if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) {
+		return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
+			pipe_ctx->link_res.hpo_dp_link_enc &&
+			dc_is_dp_signal(pipe_ctx->stream->signal) &&
+			(pipe_ctx->stream->link->remote_sinks[0]->sink_id == pipe_ctx->stream->sink->sink_id));
+	}
+
 	return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
 		pipe_ctx->link_res.hpo_dp_link_enc &&
 		dc_is_dp_signal(pipe_ctx->stream->signal));
@@ -275,6 +287,7 @@ static void populate_pipe_ctx_dlg_params_from_dml(struct pipe_ctx *pipe_ctx, str
 void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt)
 {
 	unsigned int dc_pipe_ctx_index, dml_pipe_idx, plane_id;
+	enum mall_stream_type pipe_mall_type;
 	bool unbounded_req_enabled = false;
 	struct dml2_calculate_rq_and_dlg_params_scratch *s = &in_ctx->v20.scratch.calculate_rq_and_dlg_params_scratch;
 
@@ -322,7 +335,8 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont
 		 */
 		populate_pipe_ctx_dlg_params_from_dml(&context->res_ctx.pipe_ctx[dc_pipe_ctx_index], &context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx);
 
-		if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		pipe_mall_type = dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[dc_pipe_ctx_index]);
+		if (pipe_mall_type == SUBVP_PHANTOM) {
 			// Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = 0;
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = false;
@@ -349,7 +363,7 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state != context->res_ctx.pipe_ctx[dc_pipe_ctx_index].top_pipe->plane_state) &&
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].prev_odm_pipe == NULL) {
 			/* SS: all active surfaces stored in MALL */
-			if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->mall_stream_config.type != SUBVP_PHANTOM) {
+			if (pipe_mall_type != SUBVP_PHANTOM) {
 				context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes;
 			} else {
 				/* SUBVP: phantom surfaces only stored in MALL */
@@ -468,7 +482,7 @@ bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc
 	return need_recalculation;
 }
 
-bool dml2_is_stereo_timing(struct dc_stream_state *stream)
+bool dml2_is_stereo_timing(const struct dc_stream_state *stream)
 {
 	bool is_stereo = false;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h
index 23b9028337d4..5842d6d3c4b6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h
@@ -42,7 +42,7 @@ void dml2_copy_clocks_to_dc_state(struct dml2_dcn_clocks *out_clks, struct dc_st
 void dml2_extract_watermark_set(struct dcn_watermarks *watermark, struct display_mode_lib_st *dml_core_ctx);
 int dml2_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id);
 bool is_dtbclk_required(const struct dc *dc, struct dc_state *context);
-bool dml2_is_stereo_timing(struct dc_stream_state *stream);
+bool dml2_is_stereo_timing(const struct dc_stream_state *stream);
 
 /*
  * dml2_dc_construct_pipes - This function will determine if we need additional pipes based
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
index 8f231418870f..26307e599614 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
@@ -418,7 +418,7 @@ static int find_drr_eligible_stream(struct dc_state *display_state)
 	int i;
 
 	for (i = 0; i < display_state->stream_count; i++) {
-		if (display_state->streams[i]->mall_stream_config.type == SUBVP_NONE
+		if (dc_state_get_stream_subvp_type(display_state, display_state->streams[i]) == SUBVP_NONE
 			&& display_state->streams[i]->ignore_msa_timing_param) {
 			// Use ignore_msa_timing_param flag to identify as DRR
 			return i;
@@ -634,6 +634,8 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s
 		dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.b, &dml2->v20.dml_core_ctx);
 		memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c));
 		dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.d, &dml2->v20.dml_core_ctx);
+		//copy for deciding zstate use
+		context->bw_ctx.dml.vba.StutterPeriod = context->bw_ctx.dml2->v20.dml_core_ctx.mp.StutterPeriod;
 	}
 
 	return result;
@@ -691,10 +693,15 @@ bool dml2_validate(const struct dc *in_dc, struct dc_state *context, bool fast_v
 	return out;
 }
 
+static inline struct dml2_context *dml2_allocate_memory(void)
+{
+	return (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL);
+}
+
 bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2)
 {
 	// Allocate Mode Lib Ctx
-	*dml2 = (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL);
+	*dml2 = dml2_allocate_memory();
 
 	if (!(*dml2))
 		return false;
@@ -745,3 +752,25 @@ void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2,
 	*fclk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0];
 	*dram_clk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.DRAMClockChangeSupport[0];
 }
+
+void dml2_copy(struct dml2_context *dst_dml2,
+	struct dml2_context *src_dml2)
+{
+	/* copy Mode Lib Ctx */
+	memcpy(dst_dml2, src_dml2, sizeof(struct dml2_context));
+}
+
+bool dml2_create_copy(struct dml2_context **dst_dml2,
+	struct dml2_context *src_dml2)
+{
+	/* Allocate Mode Lib Ctx */
+	*dst_dml2 = dml2_allocate_memory();
+
+	if (!(*dst_dml2))
+		return false;
+
+	/* copy Mode Lib Ctx */
+	dml2_copy(*dst_dml2, src_dml2);
+
+	return true;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
index 317f90776d97..ee0eb184eb6d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
@@ -93,15 +93,34 @@ struct dml2_dc_callbacks {
 struct dml2_dc_svp_callbacks {
 	struct dc *dc;
 	bool (*build_scaling_params)(struct pipe_ctx *pipe_ctx);
-	struct dc_stream_state* (*create_stream_for_sink)(struct dc_sink *dc_sink_data);
-	struct dc_plane_state* (*create_plane)(struct dc *dc);
-	enum dc_status (*add_stream_to_ctx)(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream);
-	bool (*add_plane_to_context)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context);
-	bool (*remove_plane_from_context)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context);
-	enum dc_status (*remove_stream_from_ctx)(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *stream);
-	void (*plane_state_release)(struct dc_plane_state *plane_state);
-	void (*stream_release)(struct dc_stream_state *stream);
+	struct dc_stream_state* (*create_phantom_stream)(const struct dc *dc,
+			struct dc_state *state,
+			struct dc_stream_state *main_stream);
+	struct dc_plane_state* (*create_phantom_plane)(struct dc *dc,
+			struct dc_state *state,
+			struct dc_plane_state *main_plane);
+	enum dc_status (*add_phantom_stream)(struct dc *dc,
+			struct dc_state *state,
+			struct dc_stream_state *phantom_stream,
+			struct dc_stream_state *main_stream);
+	bool (*add_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context);
+	bool (*remove_phantom_plane)(const struct dc *dc,
+			struct dc_stream_state *stream,
+			struct dc_plane_state *plane_state,
+			struct dc_state *context);
+	enum dc_status (*remove_phantom_stream)(struct dc *dc,
+			struct dc_state *state,
+			struct dc_stream_state *stream);
+	void (*release_phantom_plane)(const struct dc *dc,
+			struct dc_state *state,
+			struct dc_plane_state *plane);
+	void (*release_phantom_stream)(const struct dc *dc,
+			struct dc_state *state,
+			struct dc_stream_state *stream);
 	void (*release_dsc)(struct resource_context *res_ctx, const struct resource_pool *pool, struct display_stream_compressor **dsc);
+	enum mall_stream_type (*get_pipe_subvp_type)(const struct dc_state *state, const struct pipe_ctx *pipe_ctx);
+	enum mall_stream_type (*get_stream_subvp_type)(const struct dc_state *state, const struct dc_stream_state *stream);
+	struct dc_stream_state *(*get_paired_subvp_stream)(const struct dc_state *state, const struct dc_stream_state *stream);
 };
 
 struct dml2_clks_table_entry {
@@ -139,6 +158,8 @@ struct dml2_soc_bbox_overrides {
 	double urgent_latency_us;
 	double sr_exit_latency_us;
 	double sr_enter_plus_exit_latency_us;
+	double sr_exit_z8_time_us;
+	double sr_enter_plus_exit_z8_time_us;
 	double dram_clock_change_latency_us;
 	double fclk_change_latency_us;
 	unsigned int dram_num_chan;
@@ -189,6 +210,10 @@ bool dml2_create(const struct dc *in_dc,
 				 struct dml2_context **dml2);
 
 void dml2_destroy(struct dml2_context *dml2);
+void dml2_copy(struct dml2_context *dst_dml2,
+	struct dml2_context *src_dml2);
+bool dml2_create_copy(struct dml2_context **dst_dml2,
+	struct dml2_context *src_dml2);
 
 /*
  * dml2_validate - Determines if a display configuration is supported or not.
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
index a2537229ee88..b183ba5a692e 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
@@ -1,8 +1,34 @@
 # SPDX-License-Identifier: MIT
 #
 # Makefile for the 'dsc' sub-component of DAL.
+
+ifdef CONFIG_DRM_AMD_DC_FP
+
+###############################################################################
+# DCN20
+###############################################################################
+DSC_DCN20 = dcn20_dsc.o
+
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/dsc/dcn20/,$(DSC_DCN20))
+
+
+
+
+###############################################################################
+# DCN35
+###############################################################################
+
+DSC_DCN35 = dcn35_dsc.o
+
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/dsc/dcn35/,$(DSC_DCN35))
+
+
+
+endif
+
 DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o
 
 AMD_DAL_DSC = $(addprefix $(AMDDALPATH)/dc/dsc/,$(DSC))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DSC)
+
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
index e8b5f17beb96..0df6c55eb326 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
@@ -331,8 +331,9 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
 		int buff_block_size;
 		int buff_size;
 
-		if (!dsc_buff_block_size_from_dpcd(dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT],
-										   &buff_block_size))
+		if (!dsc_buff_block_size_from_dpcd(
+				dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT] & 0x03,
+				&buff_block_size))
 			return false;
 
 		buff_size = dpcd_dsc_basic_data[DP_DSC_RC_BUF_SIZE - DP_DSC_SUPPORT] + 1;
@@ -357,10 +358,15 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
 
 	{
 		int dpcd_throughput = dpcd_dsc_basic_data[DP_DSC_PEAK_THROUGHPUT - DP_DSC_SUPPORT];
+		int dsc_throughput_granular_delta;
+
+		dsc_throughput_granular_delta = dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT] >> 3;
+		dsc_throughput_granular_delta *= 2;
 
 		if (!dsc_throughput_from_dpcd(dpcd_throughput & DP_DSC_THROUGHPUT_MODE_0_MASK,
 									  &dsc_sink_caps->throughput_mode_0_mps))
 			return false;
+		dsc_sink_caps->throughput_mode_0_mps += dsc_throughput_granular_delta;
 
 		dpcd_throughput = (dpcd_throughput & DP_DSC_THROUGHPUT_MODE_1_MASK) >> DP_DSC_THROUGHPUT_MODE_1_SHIFT;
 		if (!dsc_throughput_from_dpcd(dpcd_throughput, &dsc_sink_caps->throughput_mode_1_mps))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
index c9ae2d8f0096..c9ae2d8f0096 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h
index ba869387c3c5..ba869387c3c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c
index 71d2dff9986d..71d2dff9986d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h
index 133ad38842cc..133ad38842cc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h
index 4b27f29d0d80..4b27f29d0d80 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/Makefile
index bccd46bd1815..254136f8e3f9 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/hwss/Makefile
@@ -78,7 +78,7 @@ ifdef CONFIG_DRM_AMD_DC_FP
 # DCN
 ###############################################################################
 
-HWSS_DCN10 = dcn10_hwseq.o
+HWSS_DCN10 = dcn10_hwseq.o dcn10_init.o
 
 AMD_DAL_HWSS_DCN10 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn10/,$(HWSS_DCN10))
 
@@ -86,7 +86,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN10)
 
 ###############################################################################
 
-HWSS_DCN20 = dcn20_hwseq.o
+HWSS_DCN20 = dcn20_hwseq.o dcn20_init.o
 
 AMD_DAL_HWSS_DCN20 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn20/,$(HWSS_DCN20))
 
@@ -94,7 +94,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN20)
 
 ###############################################################################
 
-HWSS_DCN201 = dcn201_hwseq.o
+HWSS_DCN201 = dcn201_hwseq.o dcn201_init.o
 
 AMD_DAL_HWSS_DCN201 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn201/,$(HWSS_DCN201))
 
@@ -102,7 +102,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN201)
 
 ###############################################################################
 
-HWSS_DCN21 = dcn21_hwseq.o
+HWSS_DCN21 = dcn21_hwseq.o dcn21_init.o
 
 AMD_DAL_HWSS_DCN21 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn21/,$(HWSS_DCN21))
 
@@ -114,7 +114,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN21)
 
 ###############################################################################
 
-HWSS_DCN30 = dcn30_hwseq.o
+HWSS_DCN30 = dcn30_hwseq.o dcn30_init.o
 
 AMD_DAL_HWSS_DCN30 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn30/,$(HWSS_DCN30))
 
@@ -122,7 +122,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN30)
 
 ###############################################################################
 
-HWSS_DCN301 = dcn301_hwseq.o
+HWSS_DCN301 = dcn301_hwseq.o dcn301_init.o
 
 AMD_DAL_HWSS_DCN301 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn301/,$(HWSS_DCN301))
 
@@ -130,15 +130,17 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN301)
 
 ###############################################################################
 
-HWSS_DCN302 = dcn302_hwseq.o
+HWSS_DCN302 = dcn302_hwseq.o dcn302_init.o
 
 AMD_DAL_HWSS_DCN302 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn302/,$(HWSS_DCN302))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN302)
 
+
+
 ###############################################################################
 
-HWSS_DCN303 = dcn303_hwseq.o
+HWSS_DCN303 = dcn303_hwseq.o dcn303_init.o
 
 AMD_DAL_HWSS_DCN303 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn303/,$(HWSS_DCN303))
 
@@ -146,7 +148,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN303)
 
 ###############################################################################
 
-HWSS_DCN31 = dcn31_hwseq.o
+HWSS_DCN31 = dcn31_hwseq.o dcn31_init.o
 
 AMD_DAL_HWSS_DCN31 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn31/,$(HWSS_DCN31))
 
@@ -154,7 +156,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN31)
 
 ###############################################################################
 
-HWSS_DCN314 = dcn314_hwseq.o
+HWSS_DCN314 = dcn314_hwseq.o dcn314_init.o
 
 AMD_DAL_HWSS_DCN314 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn314/,$(HWSS_DCN314))
 
@@ -162,7 +164,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN314)
 
 ###############################################################################
 
-HWSS_DCN32 = dcn32_hwseq.o
+HWSS_DCN32 = dcn32_hwseq.o dcn32_init.o
 
 AMD_DAL_HWSS_DCN32 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn32/,$(HWSS_DCN32))
 
@@ -170,7 +172,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN32)
 
 ###############################################################################
 
-HWSS_DCN35 = dcn35_hwseq.o
+HWSS_DCN35 = dcn35_hwseq.o dcn35_init.o
 
 AMD_DAL_HWSS_DCN35 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn35/,$(HWSS_DCN35))
 
@@ -180,4 +182,4 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN35)
 
 ###############################################################################
 
-endif
-\ No newline at end of file
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
index 44b4df6469d1..52f045cfd52a 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
@@ -682,6 +682,7 @@ struct dce_hwseq_registers {
 	uint32_t DCHUBBUB_ARB_HOSTVM_CNTL;
 	uint32_t HPO_TOP_HW_CONTROL;
 	uint32_t DMU_CLK_CNTL;
+	uint32_t DCCG_GATE_DISABLE_CNTL4;
 	uint32_t DCCG_GATE_DISABLE_CNTL5;
 };
  /* set field name */
@@ -1199,7 +1200,19 @@ struct dce_hwseq_registers {
 	type PHYBSYMCLK_ROOT_GATE_DISABLE;\
 	type PHYCSYMCLK_ROOT_GATE_DISABLE;\
 	type PHYDSYMCLK_ROOT_GATE_DISABLE;\
-	type PHYESYMCLK_ROOT_GATE_DISABLE;
+	type PHYESYMCLK_ROOT_GATE_DISABLE;\
+	type DTBCLK_P0_GATE_DISABLE;\
+	type DTBCLK_P1_GATE_DISABLE;\
+	type DTBCLK_P2_GATE_DISABLE;\
+	type DTBCLK_P3_GATE_DISABLE;\
+	type DPSTREAMCLK0_GATE_DISABLE;\
+	type DPSTREAMCLK1_GATE_DISABLE;\
+	type DPSTREAMCLK2_GATE_DISABLE;\
+	type DPSTREAMCLK3_GATE_DISABLE;\
+	type DPIASYMCLK0_GATE_DISABLE;\
+	type DPIASYMCLK1_GATE_DISABLE;\
+	type DPIASYMCLK2_GATE_DISABLE;\
+	type DPIASYMCLK3_GATE_DISABLE;
 
 struct dce_hwseq_shift {
 	HWSEQ_REG_FIELD_LIST(uint8_t)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 960a55e06375..fb328cd06cea 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -55,6 +55,7 @@
 #include "audio.h"
 #include "reg_helper.h"
 #include "panel_cntl.h"
+#include "dc_state_priv.h"
 #include "dpcd_defs.h"
 /* include DCE11 register header files */
 #include "dce/dce_11_0_d.h"
@@ -790,7 +791,7 @@ void dce110_edp_power_control(
 	struct dc_context *ctx = link->ctx;
 	struct bp_transmitter_control cntl = { 0 };
 	enum bp_result bp_result;
-	uint8_t panel_instance;
+	uint8_t pwrseq_instance;
 
 
 	if (dal_graphics_object_id_get_connector_id(link->link_enc->connector)
@@ -873,7 +874,7 @@ void dce110_edp_power_control(
 		cntl.coherent = false;
 		cntl.lanes_number = LANE_COUNT_FOUR;
 		cntl.hpd_sel = link->link_enc->hpd_source;
-		panel_instance = link->panel_cntl->inst;
+		pwrseq_instance = link->panel_cntl->pwrseq_inst;
 
 		if (ctx->dc->ctx->dmub_srv &&
 				ctx->dc->debug.dmub_command_table) {
@@ -881,11 +882,11 @@ void dce110_edp_power_control(
 			if (cntl.action == TRANSMITTER_CONTROL_POWER_ON) {
 				bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
 						LVTMA_CONTROL_POWER_ON,
-						panel_instance, link->link_powered_externally);
+						pwrseq_instance, link->link_powered_externally);
 			} else {
 				bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
 						LVTMA_CONTROL_POWER_OFF,
-						panel_instance, link->link_powered_externally);
+						pwrseq_instance, link->link_powered_externally);
 			}
 		}
 
@@ -956,7 +957,7 @@ void dce110_edp_backlight_control(
 {
 	struct dc_context *ctx = link->ctx;
 	struct bp_transmitter_control cntl = { 0 };
-	uint8_t panel_instance;
+	uint8_t pwrseq_instance;
 	unsigned int pre_T11_delay = OLED_PRE_T11_DELAY;
 	unsigned int post_T7_delay = OLED_POST_T7_DELAY;
 
@@ -1009,7 +1010,7 @@ void dce110_edp_backlight_control(
 	 */
 	/* dc_service_sleep_in_milliseconds(50); */
 		/*edp 1.2*/
-	panel_instance = link->panel_cntl->inst;
+	pwrseq_instance = link->panel_cntl->pwrseq_inst;
 
 	if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) {
 		if (!link->dc->config.edp_no_power_sequencing)
@@ -1034,11 +1035,11 @@ void dce110_edp_backlight_control(
 		if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON)
 			ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
 					LVTMA_CONTROL_LCD_BLON,
-					panel_instance, link->link_powered_externally);
+					pwrseq_instance, link->link_powered_externally);
 		else
 			ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
 					LVTMA_CONTROL_LCD_BLOFF,
-					panel_instance, link->link_powered_externally);
+					pwrseq_instance, link->link_powered_externally);
 	}
 
 	link_transmitter_control(ctx->dc_bios, &cntl);
@@ -1596,7 +1597,7 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 	 * is constructed with the same sink). Make sure not to override
 	 * and link programming on the main.
 	 */
-	if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+	if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
 		pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false;
 		pipe_ctx->stream->link->replay_settings.replay_feature_enabled = false;
 	}
@@ -1684,7 +1685,7 @@ static void disable_vga_and_power_gate_all_controllers(
 				true);
 
 		dc->current_state->res_ctx.pipe_ctx[i].pipe_idx = i;
-		dc->hwss.disable_plane(dc,
+		dc->hwss.disable_plane(dc, dc->current_state,
 			&dc->current_state->res_ctx.pipe_ctx[i]);
 	}
 }
@@ -2124,7 +2125,8 @@ static void dce110_reset_hw_ctx_wrap(
 				BREAK_TO_DEBUGGER();
 			}
 			pipe_ctx_old->stream_res.tg->funcs->disable_crtc(pipe_ctx_old->stream_res.tg);
-			pipe_ctx_old->stream->link->phy_state.symclk_ref_cnts.otg = 0;
+			if (dc_is_hdmi_tmds_signal(pipe_ctx_old->stream->signal))
+				pipe_ctx_old->stream->link->phy_state.symclk_ref_cnts.otg = 0;
 			pipe_ctx_old->plane_res.mi->funcs->free_mem_input(
 					pipe_ctx_old->plane_res.mi, dc->current_state->stream_count);
 
@@ -2133,7 +2135,7 @@ static void dce110_reset_hw_ctx_wrap(
 										old_clk))
 				old_clk->funcs->cs_power_down(old_clk);
 
-			dc->hwss.disable_plane(dc, pipe_ctx_old);
+			dc->hwss.disable_plane(dc, dc->current_state, pipe_ctx_old);
 
 			pipe_ctx_old->stream = NULL;
 		}
@@ -2497,6 +2499,7 @@ static bool wait_for_reset_trigger_to_occur(
 /* Enable timing synchronization for a group of Timing Generators. */
 static void dce110_enable_timing_synchronization(
 		struct dc *dc,
+		struct dc_state *state,
 		int group_index,
 		int group_size,
 		struct pipe_ctx *grouped_pipes[])
@@ -2590,6 +2593,7 @@ static void init_hw(struct dc *dc)
 	struct dmcu *dmcu;
 	struct dce_hwseq *hws = dc->hwseq;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 
 	bp = dc->ctx->dc_bios;
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -2639,13 +2643,15 @@ static void init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 
 	abm = dc->res_pool->abm;
 	if (abm != NULL)
-		abm->funcs->abm_init(abm, backlight);
+		abm->funcs->abm_init(abm, backlight, user_level);
 
 	dmcu = dc->res_pool->dmcu;
 	if (dmcu != NULL && abm != NULL)
@@ -2842,7 +2848,7 @@ static void dce110_post_unlock_program_front_end(
 {
 }
 
-static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx)
+static void dce110_power_down_fe(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
 	int fe_idx = pipe_ctx->plane_res.mi ?
@@ -3115,7 +3121,8 @@ void dce110_disable_link_output(struct dc_link *link,
 	struct dmcu *dmcu = dc->res_pool->dmcu;
 
 	if (signal == SIGNAL_TYPE_EDP &&
-			link->dc->hwss.edp_backlight_control)
+			link->dc->hwss.edp_backlight_control &&
+			!link->skip_implict_edp_power_control)
 		link->dc->hwss.edp_backlight_control(link, false);
 	else if (dmcu != NULL && dmcu->funcs->lock_phy)
 		dmcu->funcs->lock_phy(dmcu);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 2b8b8366538e..51dd2ae09b2a 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -56,6 +56,7 @@
 #include "dc_trace.h"
 #include "dce/dmub_outbox.h"
 #include "link.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER \
 	dc_logger
@@ -115,7 +116,7 @@ void dcn10_lock_all_pipes(struct dc *dc,
 		    !pipe_ctx->stream ||
 		    (!pipe_ctx->plane_state && !old_pipe_ctx->plane_state) ||
 		    !tg->funcs->is_tg_enabled(tg) ||
-			pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM)
+			dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM)
 			continue;
 
 		if (lock)
@@ -1057,7 +1058,8 @@ static void dcn10_reset_back_end_for_pipe(
 		if (pipe_ctx->stream_res.tg->funcs->set_drr)
 			pipe_ctx->stream_res.tg->funcs->set_drr(
 					pipe_ctx->stream_res.tg, NULL);
-		pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
+		if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+			pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
@@ -1180,7 +1182,9 @@ void dcn10_verify_allow_pstate_change_high(struct dc *dc)
 }
 
 /* trigger HW to start disconnect plane from stream on the next vsync */
-void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn10_plane_atomic_disconnect(struct dc *dc,
+		struct dc_state *state,
+		struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
@@ -1200,7 +1204,7 @@ void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove);
 	// Phantom pipes have OTG disabled by default, so MPCC_STATUS will never assert idle,
 	// so don't wait for MPCC_IDLE in the programming sequence
-	if (opp != NULL && !pipe_ctx->plane_state->is_phantom)
+	if (opp != NULL && dc_state_get_pipe_subvp_type(state, pipe_ctx) != SUBVP_PHANTOM)
 		opp->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 
 	dc->optimized_required = true;
@@ -1290,7 +1294,7 @@ void dcn10_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	pipe_ctx->plane_state = NULL;
 }
 
-void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn10_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
 	DC_LOGGER_INIT(dc->ctx->logger);
@@ -1416,12 +1420,12 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 		dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
 
-		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
 
 		if (tg->funcs->is_tg_enabled(tg))
 			tg->funcs->unlock(tg);
 
-		dc->hwss.disable_plane(dc, pipe_ctx);
+		dc->hwss.disable_plane(dc, context, pipe_ctx);
 
 		pipe_ctx->stream_res.tg = NULL;
 		pipe_ctx->plane_res.hubp = NULL;
@@ -1486,6 +1490,7 @@ void dcn10_init_hw(struct dc *dc)
 	struct dc_bios *dcb = dc->ctx->dc_bios;
 	struct resource_pool *res_pool = dc->res_pool;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 	bool   is_optimized_init_done = false;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
@@ -1583,12 +1588,14 @@ void dcn10_init_hw(struct dc *dc)
 		for (i = 0; i < dc->link_count; i++) {
 			struct dc_link *link = dc->links[i];
 
-			if (link->panel_cntl)
+			if (link->panel_cntl) {
 				backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+				user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+			}
 		}
 
 		if (abm != NULL)
-			abm->funcs->abm_init(abm, backlight);
+			abm->funcs->abm_init(abm, backlight, user_level);
 
 		if (dmcu != NULL && !dmcu->auto_load_dmcu)
 			dmcu->funcs->dmcu_init(dmcu);
@@ -2262,6 +2269,7 @@ void dcn10_enable_vblanks_synchronization(
 
 void dcn10_enable_timing_synchronization(
 	struct dc *dc,
+	struct dc_state *state,
 	int group_index,
 	int group_size,
 	struct pipe_ctx *grouped_pipes[])
@@ -2276,7 +2284,7 @@ void dcn10_enable_timing_synchronization(
 	DC_SYNC_INFO("Setting up OTG reset trigger\n");
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		opp = grouped_pipes[i]->stream_res.opp;
@@ -2296,14 +2304,14 @@ void dcn10_enable_timing_synchronization(
 		if (grouped_pipes[i]->stream == NULL)
 			continue;
 
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		grouped_pipes[i]->stream->vblank_synchronized = false;
 	}
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		grouped_pipes[i]->stream_res.tg->funcs->enable_reset_trigger(
@@ -2317,11 +2325,11 @@ void dcn10_enable_timing_synchronization(
 	 * synchronized. Look at last pipe programmed to reset.
 	 */
 
-	if (grouped_pipes[1]->stream && grouped_pipes[1]->stream->mall_stream_config.type != SUBVP_PHANTOM)
+	if (grouped_pipes[1]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[1]) != SUBVP_PHANTOM)
 		wait_for_reset_trigger_to_occur(dc_ctx, grouped_pipes[1]->stream_res.tg);
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		grouped_pipes[i]->stream_res.tg->funcs->disable_reset_trigger(
@@ -2329,7 +2337,7 @@ void dcn10_enable_timing_synchronization(
 	}
 
 	for (i = 1; i < group_size; i++) {
-		if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+		if (dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
 			continue;
 
 		opp = grouped_pipes[i]->stream_res.opp;
@@ -3021,7 +3029,7 @@ void dcn10_post_unlock_program_front_end(
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
 		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
-			dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+			dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
 		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) {
@@ -3068,7 +3076,7 @@ void dcn10_prepare_bandwidth(
 			context,
 			false);
 
-	dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub,
+	dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub,
 			&context->bw_ctx.bw.dcn.watermarks,
 			dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
 			true);
@@ -3417,7 +3425,8 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
 		.h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz,
 		.v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert,
 		.rotation = pipe_ctx->plane_state->rotation,
-		.mirror = pipe_ctx->plane_state->horizontal_mirror
+		.mirror = pipe_ctx->plane_state->horizontal_mirror,
+		.stream = pipe_ctx->stream,
 	};
 	bool pipe_split_on = false;
 	bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) ||
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
index ef6d56da417c..bc5dd68a2408 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
@@ -75,7 +75,7 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx);
 void dcn10_reset_hw_ctx_wrap(
 		struct dc *dc,
 		struct dc_state *context);
-void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 void dcn10_lock_all_pipes(
 		struct dc *dc,
 		struct dc_state *context,
@@ -108,13 +108,16 @@ void dcn10_power_down_on_boot(struct dc *dc);
 enum dc_status dce110_apply_ctx_to_hw(
 		struct dc *dc,
 		struct dc_state *context);
-void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_plane_atomic_disconnect(struct dc *dc,
+		struct dc_state *state,
+		struct pipe_ctx *pipe_ctx);
 void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data);
 void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx);
 void dce110_power_down(struct dc *dc);
 void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context);
 void dcn10_enable_timing_synchronization(
 		struct dc *dc,
+		struct dc_state *state,
 		int group_index,
 		int group_size,
 		struct pipe_ctx *grouped_pipes[]);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c
index a5bdac79a744..a5bdac79a744 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h
index 8c6fd7b844a4..8c6fd7b844a4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 608221b0dd5d..bc71a9b058fe 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -55,6 +55,7 @@
 #include "inc/link_enc_cfg.h"
 #include "link_hwss.h"
 #include "link.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER \
 	dc_logger
@@ -623,9 +624,9 @@ void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 }
 
 
-void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn20_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
 {
-	bool is_phantom = pipe_ctx->plane_state && pipe_ctx->plane_state->is_phantom;
+	bool is_phantom = dc_state_get_pipe_subvp_type(state, pipe_ctx) == SUBVP_PHANTOM;
 	struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL;
 
 	DC_LOGGER_INIT(dc->ctx->logger);
@@ -847,7 +848,7 @@ enum dc_status dcn20_enable_stream_timing(
 	/* TODO enable stream if timing changed */
 	/* TODO unblank stream if DP */
 
-	if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+	if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) {
 		if (pipe_ctx->stream_res.tg && pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable)
 			pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable(pipe_ctx->stream_res.tg);
 	}
@@ -1368,8 +1369,14 @@ void dcn20_pipe_control_lock(
 	}
 }
 
-static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe)
+static void dcn20_detect_pipe_changes(struct dc_state *old_state,
+		struct dc_state *new_state,
+		struct pipe_ctx *old_pipe,
+		struct pipe_ctx *new_pipe)
 {
+	bool old_is_phantom = dc_state_get_pipe_subvp_type(old_state, old_pipe) == SUBVP_PHANTOM;
+	bool new_is_phantom = dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM;
+
 	new_pipe->update_flags.raw = 0;
 
 	/* If non-phantom pipe is being transitioned to a phantom pipe,
@@ -1379,8 +1386,8 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
 	 * be different). The post_unlock sequence will set the correct
 	 * update flags to enable the phantom pipe.
 	 */
-	if (old_pipe->plane_state && !old_pipe->plane_state->is_phantom &&
-			new_pipe->plane_state && new_pipe->plane_state->is_phantom) {
+	if (old_pipe->plane_state && !old_is_phantom &&
+			new_pipe->plane_state && new_is_phantom) {
 		new_pipe->update_flags.bits.disable = 1;
 		return;
 	}
@@ -1400,6 +1407,10 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
 		new_pipe->update_flags.bits.scaler = 1;
 		new_pipe->update_flags.bits.viewport = 1;
 		new_pipe->update_flags.bits.det_size = 1;
+		if (new_pipe->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE &&
+				new_pipe->stream_res.test_pattern_params.width != 0 &&
+				new_pipe->stream_res.test_pattern_params.height != 0)
+			new_pipe->update_flags.bits.test_pattern_changed = 1;
 		if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) {
 			new_pipe->update_flags.bits.odm = 1;
 			new_pipe->update_flags.bits.global_sync = 1;
@@ -1412,14 +1423,14 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
 	 * The remove-add sequence of the phantom pipe always results in the pipe
 	 * being blanked in enable_stream_timing (DPG).
 	 */
-	if (new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)
+	if (new_pipe->stream && dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM)
 		new_pipe->update_flags.bits.enable = 1;
 
 	/* Phantom pipes are effectively disabled, if the pipe was previously phantom
 	 * we have to enable
 	 */
-	if (old_pipe->plane_state && old_pipe->plane_state->is_phantom &&
-			new_pipe->plane_state && !new_pipe->plane_state->is_phantom)
+	if (old_pipe->plane_state && old_is_phantom &&
+			new_pipe->plane_state && !new_is_phantom)
 		new_pipe->update_flags.bits.enable = 1;
 
 	if (old_pipe->plane_state && !new_pipe->plane_state) {
@@ -1556,6 +1567,7 @@ static void dcn20_update_dchubp_dpp(
 	struct dc_plane_state *plane_state = pipe_ctx->plane_state;
 	struct dccg *dccg = dc->res_pool->dccg;
 	bool viewport_changed = false;
+	enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe_ctx);
 
 	if (pipe_ctx->update_flags.bits.dppclk)
 		dpp->funcs->dpp_dppclk_control(dpp, false, true);
@@ -1701,7 +1713,7 @@ static void dcn20_update_dchubp_dpp(
 		pipe_ctx->update_flags.bits.plane_changed ||
 		plane_state->update_flags.bits.addr_update) {
 		if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) &&
-				pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) {
+				pipe_mall_type == SUBVP_MAIN) {
 			union block_sequence_params params;
 
 			params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv;
@@ -1715,7 +1727,7 @@ static void dcn20_update_dchubp_dpp(
 	if (pipe_ctx->update_flags.bits.enable)
 		hubp->funcs->set_blank(hubp, false);
 	/* If the stream paired with this plane is phantom, the plane is also phantom */
-	if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM
+	if (pipe_ctx->stream && pipe_mall_type == SUBVP_PHANTOM
 			&& hubp->funcs->phantom_hubp_post_enable)
 		hubp->funcs->phantom_hubp_post_enable(hubp);
 }
@@ -1773,7 +1785,7 @@ static void dcn20_program_pipe(
 				pipe_ctx->pipe_dlg_param.vupdate_offset,
 				pipe_ctx->pipe_dlg_param.vupdate_width);
 
-		if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM)
+		if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)
 			pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
 
 		pipe_ctx->stream_res.tg->funcs->set_vtg_params(
@@ -1870,6 +1882,42 @@ static void dcn20_program_pipe(
 	}
 }
 
+static void update_vmin_vmax_fams(struct dc *dc,
+		struct dc_state *context)
+{
+	uint32_t i;
+	struct drr_params params = {0};
+	bool subvp_in_use = resource_subvp_in_use(dc, context);
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (resource_is_pipe_type(pipe, OTG_MASTER) &&
+				((subvp_in_use && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM &&
+				pipe->stream->allow_freesync) || (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && pipe->stream->fpo_in_use))) {
+			if (!pipe->stream->vrr_active_variable && !pipe->stream->vrr_active_fixed) {
+				struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg;
+
+				/* DRR should be configured already if we're in active variable
+				 * or active fixed, so only program if we're not in this state
+				 */
+				params.vertical_total_min = pipe->stream->timing.v_total;
+				params.vertical_total_max = pipe->stream->timing.v_total;
+				tg->funcs->set_drr(tg, &params);
+			}
+		} else {
+			if (resource_is_pipe_type(pipe, OTG_MASTER) &&
+					!pipe->stream->vrr_active_variable &&
+					!pipe->stream->vrr_active_fixed) {
+				struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg;
+				params.vertical_total_min = 0;
+				params.vertical_total_max = 0;
+				tg->funcs->set_drr(tg, &params);
+			}
+		}
+	}
+}
+
 void dcn20_program_front_end_for_ctx(
 		struct dc *dc,
 		struct dc_state *context)
@@ -1877,6 +1925,8 @@ void dcn20_program_front_end_for_ctx(
 	int i;
 	struct dce_hwseq *hws = dc->hwseq;
 	DC_LOGGER_INIT(dc->ctx->logger);
+	unsigned int prev_hubp_count = 0;
+	unsigned int hubp_count = 0;
 
 	if (resource_is_pipe_topology_changed(dc->current_state, context))
 		resource_log_pipe_topology_update(dc, context);
@@ -1894,9 +1944,23 @@ void dcn20_program_front_end_for_ctx(
 		}
 	}
 
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		if (dc->current_state->res_ctx.pipe_ctx[i].plane_state)
+			prev_hubp_count++;
+		if (context->res_ctx.pipe_ctx[i].plane_state)
+			hubp_count++;
+	}
+
+	if (prev_hubp_count == 0 && hubp_count > 0) {
+		if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+			dc->res_pool->hubbub->funcs->force_pstate_change_control(
+					dc->res_pool->hubbub, true, false);
+		udelay(500);
+	}
+
 	/* Set pipe update flags and lock pipes */
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
-		dcn20_detect_pipe_changes(&dc->current_state->res_ctx.pipe_ctx[i],
+		dcn20_detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i],
 				&context->res_ctx.pipe_ctx[i]);
 
 	/* When disabling phantom pipes, turn on phantom OTG first (so we can get double
@@ -1906,15 +1970,16 @@ void dcn20_program_front_end_for_ctx(
 		struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream;
 
 		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream &&
-			dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				dc_state_get_pipe_subvp_type(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) {
 			struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg;
 
 			if (tg->funcs->enable_crtc) {
 				if (dc->hwss.blank_phantom) {
 					int main_pipe_width, main_pipe_height;
+					struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(dc->current_state, dc->current_state->res_ctx.pipe_ctx[i].stream);
 
-					main_pipe_width = dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.paired_stream->dst.width;
-					main_pipe_height = dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.paired_stream->dst.height;
+					main_pipe_width = phantom_stream->dst.width;
+					main_pipe_height = phantom_stream->dst.height;
 					dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height);
 				}
 				tg->funcs->enable_crtc(tg);
@@ -1929,6 +1994,7 @@ void dcn20_program_front_end_for_ctx(
 				&& context->res_ctx.pipe_ctx[i].stream)
 			hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true);
 
+	update_vmin_vmax_fams(dc, context);
 
 	/* Disconnect mpcc */
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
@@ -1943,9 +2009,9 @@ void dcn20_program_front_end_for_ctx(
 			 * DET allocation.
 			 */
 			if (hubbub->funcs->program_det_size && (context->res_ctx.pipe_ctx[i].update_flags.bits.disable ||
-					(context->res_ctx.pipe_ctx[i].plane_state && context->res_ctx.pipe_ctx[i].plane_state->is_phantom)))
+					(context->res_ctx.pipe_ctx[i].plane_state && dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM)))
 				hubbub->funcs->program_det_size(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
-			hws->funcs.plane_atomic_disconnect(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+			hws->funcs.plane_atomic_disconnect(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
 			DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx);
 		}
 
@@ -1968,7 +2034,7 @@ void dcn20_program_front_end_for_ctx(
 					 * but the MPO still exists until the double buffered update of the main pipe so we
 					 * will get a frame of underflow if the phantom pipe is programmed here.
 					 */
-					if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_PHANTOM)
+					if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM)
 						dcn20_program_pipe(dc, pipe, context);
 				}
 
@@ -2018,7 +2084,7 @@ void dcn20_post_unlock_program_front_end(
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
 		if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
-			dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+			dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
 
 	/*
 	 * If we are enabling a pipe, we need to wait for pending clear as this is a critical
@@ -2030,7 +2096,7 @@ void dcn20_post_unlock_program_front_end(
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 		// Don't check flip pending on phantom pipes
 		if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable &&
-				pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+				dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) {
 			struct hubp *hubp = pipe->plane_res.hubp;
 			int j = 0;
 			for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us
@@ -2039,6 +2105,10 @@ void dcn20_post_unlock_program_front_end(
 		}
 	}
 
+	if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+		dc->res_pool->hubbub->funcs->force_pstate_change_control(
+				dc->res_pool->hubbub, false, false);
+
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
@@ -2049,7 +2119,7 @@ void dcn20_post_unlock_program_front_end(
 			 * programming sequence).
 			 */
 			while (pipe) {
-				if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 					/* When turning on the phantom pipe we want to run through the
 					 * entire enable sequence, so apply all the "enable" flags.
 					 */
@@ -2119,17 +2189,17 @@ void dcn20_prepare_bandwidth(
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
 		// At optimize don't restore the original watermark value
-		if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) {
 			context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U;
 			break;
 		}
 	}
 
 	/* program dchubbub watermarks:
-	 * For assigning wm_optimized_required, use |= operator since we don't want
+	 * For assigning optimized_required, use |= operator since we don't want
 	 * to clear the value if the optimize has not happened yet
 	 */
-	dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub,
+	dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub,
 					&context->bw_ctx.bw.dcn.watermarks,
 					dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
 					false);
@@ -2142,10 +2212,10 @@ void dcn20_prepare_bandwidth(
 	if (hubbub->funcs->program_compbuf_size) {
 		if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) {
 			compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes;
-			dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
+			dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
 		} else {
 			compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb;
-			dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
+			dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
 		}
 
 		hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false);
@@ -2163,7 +2233,7 @@ void dcn20_optimize_bandwidth(
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
 		// At optimize don't need  to restore the original watermark value
-		if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) {
 			context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U;
 			break;
 		}
@@ -2197,7 +2267,8 @@ void dcn20_optimize_bandwidth(
 			dc->clk_mgr,
 			context,
 			true);
-	if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW) {
+	if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW &&
+		!dc->debug.disable_extblankadj) {
 		for (i = 0; i < dc->res_pool->pipe_count; ++i) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
@@ -2590,7 +2661,8 @@ static void dcn20_reset_back_end_for_pipe(
 		 * the case where the same symclk is shared across multiple otg
 		 * instances
 		 */
-		link->phy_state.symclk_ref_cnts.otg = 0;
+		if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+			link->phy_state.symclk_ref_cnts.otg = 0;
 		if (link->phy_state.symclk_state == SYMCLK_ON_TX_OFF) {
 			link_hwss->disable_link_output(link,
 					&pipe_ctx->link_res, pipe_ctx->stream->signal);
@@ -2923,7 +2995,7 @@ void dcn20_fpga_init_hw(struct dc *dc)
 		dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
 		/*to do*/
-		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
 	}
 
 	/* initialize DWB pointer to MCIF_WB */
@@ -2940,7 +3012,7 @@ void dcn20_fpga_init_hw(struct dc *dc)
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-		dc->hwss.disable_plane(dc, pipe_ctx);
+		dc->hwss.disable_plane(dc, context, pipe_ctx);
 
 		pipe_ctx->stream_res.tg = NULL;
 		pipe_ctx->plane_res.hubp = NULL;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
index ab02e4e9c8c2..b94c85340abf 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
@@ -52,7 +52,7 @@ void dcn20_program_output_csc(struct dc *dc,
 void dcn20_enable_stream(struct pipe_ctx *pipe_ctx);
 void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx,
 		struct dc_link_settings *link_settings);
-void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 void dcn20_disable_pixel_data(
 		struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c
index 884e3e323338..884e3e323338 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h
index 12277797cd71..12277797cd71 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
index d3fe6092f50e..d5769f38874f 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
@@ -320,7 +320,7 @@ void dcn201_init_hw(struct dc *dc)
 		res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = res_pool->opps[i];
 		/*To do: number of MPCC != number of opp*/
-		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
 	}
 
 	/* initialize DWB pointer to MCIF_WB */
@@ -337,7 +337,7 @@ void dcn201_init_hw(struct dc *dc)
 	for (i = 0; i < res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-		dc->hwss.disable_plane(dc, pipe_ctx);
+		dc->hwss.disable_plane(dc, context, pipe_ctx);
 
 		pipe_ctx->stream_res.tg = NULL;
 		pipe_ctx->plane_res.hubp = NULL;
@@ -369,7 +369,9 @@ void dcn201_init_hw(struct dc *dc)
 }
 
 /* trigger HW to start disconnect plane from stream on the next vsync */
-void dcn201_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn201_plane_atomic_disconnect(struct dc *dc,
+		struct dc_state *state,
+		struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h
index 26cd62be6418..6a50a9894be6 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h
@@ -33,7 +33,7 @@ void dcn201_init_hw(struct dc *dc);
 void dcn201_unblank_stream(struct pipe_ctx *pipe_ctx,
 		struct dc_link_settings *link_settings);
 void dcn201_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx);
-void dcn201_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn201_plane_atomic_disconnect(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 void dcn201_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx);
 void dcn201_set_cursor_attribute(struct pipe_ctx *pipe_ctx);
 void dcn201_pipe_control_lock(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c
index a13bf6c9386e..a13bf6c9386e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h
index 1168887b033d..1168887b033d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
index 467812cf3368..8e88dcaf88f5 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
@@ -137,7 +137,8 @@ void dcn21_PLAT_58856_wa(struct dc_state *context, struct pipe_ctx *pipe_ctx)
 	pipe_ctx->stream->dpms_off = true;
 }
 
-static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst)
+static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst,
+		uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst)
 {
 	union dmub_rb_cmd cmd;
 	struct dc_context *dc = abm->ctx;
@@ -147,12 +148,13 @@ static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t optio
 	cmd.abm_set_pipe.header.type = DMUB_CMD__ABM;
 	cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE;
 	cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst;
+	cmd.abm_set_pipe.abm_set_pipe_data.pwrseq_inst = pwrseq_inst;
 	cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option;
 	cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst;
 	cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary;
 	cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -171,7 +173,7 @@ static void dmub_abm_set_backlight(struct dc_context *dc, uint32_t backlight_pwm
 	cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst);
 	cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data);
 
-	dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
@@ -179,7 +181,6 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
 	struct abm *abm = pipe_ctx->stream_res.abm;
 	uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
 	struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
-
 	struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu;
 
 	if (dmcu) {
@@ -190,9 +191,13 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
 	if (abm && panel_cntl) {
 		if (abm->funcs && abm->funcs->set_pipe_ex) {
 			abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE,
-			panel_cntl->inst);
+					panel_cntl->inst, panel_cntl->pwrseq_inst);
 		} else {
-			dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE, panel_cntl->inst);
+				dmub_abm_set_pipe(abm,
+						otg_inst,
+						SET_ABM_PIPE_IMMEDIATELY_DISABLE,
+						panel_cntl->inst,
+						panel_cntl->pwrseq_inst);
 		}
 		panel_cntl->funcs->store_backlight_level(panel_cntl);
 	}
@@ -212,9 +217,16 @@ void dcn21_set_pipe(struct pipe_ctx *pipe_ctx)
 
 	if (abm && panel_cntl) {
 		if (abm->funcs && abm->funcs->set_pipe_ex) {
-			abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
+			abm->funcs->set_pipe_ex(abm,
+					otg_inst,
+					SET_ABM_PIPE_NORMAL,
+					panel_cntl->inst,
+					panel_cntl->pwrseq_inst);
 		} else {
-			dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
+				dmub_abm_set_pipe(abm, otg_inst,
+						SET_ABM_PIPE_NORMAL,
+						panel_cntl->inst,
+						panel_cntl->pwrseq_inst);
 		}
 	}
 }
@@ -237,9 +249,17 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx,
 
 		if (abm && panel_cntl) {
 			if (abm->funcs && abm->funcs->set_pipe_ex) {
-				abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
+				abm->funcs->set_pipe_ex(abm,
+						otg_inst,
+						SET_ABM_PIPE_NORMAL,
+						panel_cntl->inst,
+						panel_cntl->pwrseq_inst);
 			} else {
-				dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
+					dmub_abm_set_pipe(abm,
+							otg_inst,
+							SET_ABM_PIPE_NORMAL,
+							panel_cntl->inst,
+							panel_cntl->pwrseq_inst);
 			}
 		}
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c
index 18249c6b6d81..18249c6b6d81 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h
index 3ed24292648a..3ed24292648a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
index d71faf2ecd41..c34c13e1e0a4 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
@@ -51,7 +51,7 @@
 #include "dcn20/dcn20_hwseq.h"
 #include "dcn30/dcn30_resource.h"
 #include "link.h"
-
+#include "dc_state_priv.h"
 
 
 
@@ -367,6 +367,10 @@ void dcn30_enable_writeback(
 	DC_LOG_DWB("%s dwb_pipe_inst = %d, mpcc_inst = %d",\
 		__func__, wb_info->dwb_pipe_inst,\
 		wb_info->mpcc_inst);
+
+	/* Warmup interface */
+	dcn30_mmhubbub_warmup(dc, 1, wb_info);
+
 	/* Update writeback pipe */
 	dcn30_set_writeback(dc, wb_info, context);
 
@@ -472,6 +476,7 @@ void dcn30_init_hw(struct dc *dc)
 	int i;
 	int edp_num;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
 		dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
@@ -608,13 +613,15 @@ void dcn30_init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (abms[i] != NULL)
-			abms[i]->funcs->abm_init(abms[i], backlight);
+			abms[i]->funcs->abm_init(abms[i], backlight, user_level);
 	}
 
 	/* power AFMT HDMI memory TODO: may move to dis/en output save power*/
@@ -750,7 +757,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 				cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_NO_DF_REQ;
 				cmd.mall.header.payload_bytes = sizeof(cmd.mall) - sizeof(cmd.mall.header);
 
-				dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+				dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 				return true;
 			}
@@ -872,7 +879,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 					cmd.mall.cursor_height = cursor_attr.height;
 					cmd.mall.cursor_pitch = cursor_attr.pitch;
 
-					dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+					dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 					/* Use copied cursor, and it's okay to not switch back */
 					cursor_attr.address.quad_part = cmd.mall.cursor_copy_dst.quad_part;
@@ -888,7 +895,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 				cmd.mall.tmr_scale = tmr_scale;
 				cmd.mall.debug_bits = dc->debug.mall_error_as_fatal;
 
-				dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+				dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 				return true;
 			}
@@ -905,7 +912,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 	cmd.mall.header.payload_bytes =
 		sizeof(cmd.mall) - sizeof(cmd.mall.header);
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -962,7 +969,7 @@ void dcn30_hardware_release(struct dc *dc)
 		if (!pipe->stream)
 			continue;
 
-		if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+		if (dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_MAIN) {
 			subvp_in_use = true;
 			break;
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
index 9894caedffed..9894caedffed 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h
index c280ff90bfa3..c280ff90bfa3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
index 6477009ce065..6477009ce065 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h
index 0bca48ccbfa2..0bca48ccbfa2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c
index 637f9514d37b..637f9514d37b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h
index 899587b93aa1..899587b93aa1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c
index edb4d68b8187..edb4d68b8187 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h
index 4949981126d7..4949981126d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
index 97798cee876e..7423880fabb6 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
@@ -96,7 +96,8 @@ static void enable_memory_low_power(struct dc *dc)
 	if (dc->debug.enable_mem_low_power.bits.vpg && dc->res_pool->stream_enc[0]->vpg->funcs->vpg_powerdown) {
 		// Power down VPGs
 		for (i = 0; i < dc->res_pool->stream_enc_count; i++)
-			dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg);
+			if (dc->res_pool->stream_enc[i]->vpg)
+				dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg);
 #if defined(CONFIG_DRM_AMD_DC_FP)
 		for (i = 0; i < dc->res_pool->hpo_dp_stream_enc_count; i++)
 			dc->res_pool->hpo_dp_stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->hpo_dp_stream_enc[i]->vpg);
@@ -112,6 +113,7 @@ void dcn31_init_hw(struct dc *dc)
 	struct dc_bios *dcb = dc->ctx->dc_bios;
 	struct resource_pool *res_pool = dc->res_pool;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 	int i;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
@@ -223,13 +225,15 @@ void dcn31_init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (abms[i] != NULL)
-			abms[i]->funcs->abm_init(abms[i], backlight);
+			abms[i]->funcs->abm_init(abms[i], backlight, user_level);
 	}
 
 	/* power AFMT HDMI memory TODO: may move to dis/en output save power*/
@@ -415,7 +419,7 @@ void dcn31_z10_save_init(struct dc *dc)
 	cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT;
 	cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dcn31_z10_restore(const struct dc *dc)
@@ -433,7 +437,7 @@ void dcn31_z10_restore(const struct dc *dc)
 	cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT;
 	cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_RESTORE;
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
 
 void dcn31_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on)
@@ -523,7 +527,8 @@ static void dcn31_reset_back_end_for_pipe(
 	if (pipe_ctx->stream_res.tg->funcs->set_odm_bypass)
 		pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
 				pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
-	pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
+	if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+		pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
 
 	if (pipe_ctx->stream_res.tg->funcs->set_drr)
 		pipe_ctx->stream_res.tg->funcs->set_drr(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
index 669f524bd064..669f524bd064 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h
index a3db08c8bd35..a3db08c8bd35 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
index ccb7e317e86a..ccb7e317e86a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h
index 8f92e66577cf..8f92e66577cf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index 6a65af8c36b9..6c9299c7683d 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -51,6 +51,7 @@
 #include "dcn32/dcn32_resource.h"
 #include "link.h"
 #include "../dcn20/dcn20_hwseq.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -277,7 +278,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
 				cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_DCN_REQ;
 				cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header);
 
-				dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+				dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 				return true;
 			}
@@ -311,7 +312,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
 				cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header);
 				cmd.cab.cab_alloc_ways = (uint8_t)ways;
 
-				dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+				dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
 
 				return true;
 			}
@@ -327,7 +328,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
 	cmd.cab.header.payload_bytes =
 			sizeof(cmd.cab) - sizeof(cmd.cab.header);
 
-	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
 	return true;
 }
@@ -348,8 +349,7 @@ void dcn32_commit_subvp_config(struct dc *dc, struct dc_state *context)
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.paired_stream &&
-				pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) {
+		if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) {
 			// There is at least 1 SubVP pipe, so enable SubVP
 			enable_subvp = true;
 			break;
@@ -375,18 +375,20 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc,
 	bool subvp_immediate_flip = false;
 	bool subvp_in_use = false;
 	struct pipe_ctx *pipe;
+	enum mall_stream_type pipe_mall_type = SUBVP_NONE;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		pipe = &context->res_ctx.pipe_ctx[i];
+		pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
 
-		if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+		if (pipe->stream && pipe->plane_state && pipe_mall_type == SUBVP_MAIN) {
 			subvp_in_use = true;
 			break;
 		}
 	}
 
 	if (top_pipe_to_program && top_pipe_to_program->stream && top_pipe_to_program->plane_state) {
-		if (top_pipe_to_program->stream->mall_stream_config.type == SUBVP_MAIN &&
+		if (dc_state_get_pipe_subvp_type(context, top_pipe_to_program) == SUBVP_MAIN &&
 				top_pipe_to_program->plane_state->flip_immediate)
 			subvp_immediate_flip = true;
 	}
@@ -398,7 +400,7 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc,
 		if (!lock) {
 			for (i = 0; i < dc->res_pool->pipe_count; i++) {
 				pipe = &context->res_ctx.pipe_ctx[i];
-				if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN &&
+				if (pipe->stream && pipe->plane_state && pipe_mall_type == SUBVP_MAIN &&
 						should_lock_all_pipes)
 					pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VBLANK);
 			}
@@ -416,14 +418,7 @@ void dcn32_subvp_pipe_control_lock_fast(union block_sequence_params *params)
 {
 	struct dc *dc = params->subvp_pipe_control_lock_fast_params.dc;
 	bool lock = params->subvp_pipe_control_lock_fast_params.lock;
-	struct pipe_ctx *pipe_ctx = params->subvp_pipe_control_lock_fast_params.pipe_ctx;
-	bool subvp_immediate_flip = false;
-
-	if (pipe_ctx && pipe_ctx->stream && pipe_ctx->plane_state) {
-		if (pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN &&
-				pipe_ctx->plane_state->flip_immediate)
-			subvp_immediate_flip = true;
-	}
+	bool subvp_immediate_flip = params->subvp_pipe_control_lock_fast_params.subvp_immediate_flip;
 
 	// Don't need to lock for DRR VSYNC flips -- FW will wait for DRR pending update cleared.
 	if (subvp_immediate_flip) {
@@ -487,8 +482,7 @@ bool dcn32_set_mcm_luts(
 		if (plane_state->blend_tf->type == TF_TYPE_HWPWL)
 			lut_params = &plane_state->blend_tf->pwl;
 		else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) {
-			cm_helper_translate_curve_to_hw_format(plane_state->ctx,
-					plane_state->blend_tf,
+			cm3_helper_translate_curve_to_hw_format(plane_state->blend_tf,
 					&dpp_base->regamma_params, false);
 			lut_params = &dpp_base->regamma_params;
 		}
@@ -503,8 +497,7 @@ bool dcn32_set_mcm_luts(
 		else if (plane_state->in_shaper_func->type == TF_TYPE_DISTRIBUTED_POINTS) {
 			// TODO: dpp_base replace
 			ASSERT(false);
-			cm_helper_translate_curve_to_hw_format(plane_state->ctx,
-					plane_state->in_shaper_func,
+			cm3_helper_translate_curve_to_hw_format(plane_state->in_shaper_func,
 					&dpp_base->shaper_params, true);
 			lut_params = &dpp_base->shaper_params;
 		}
@@ -611,7 +604,7 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context)
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 		struct hubp *hubp = pipe->plane_res.hubp;
 
-		if (!pipe->stream || !(pipe->stream->mall_stream_config.type == SUBVP_MAIN ||
+		if (!pipe->stream || !(dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN ||
 		    pipe->stream->fpo_in_use)) {
 			if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
 				hubp->funcs->hubp_update_force_pstate_disallow(hubp, false);
@@ -626,7 +619,7 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context)
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 		struct hubp *hubp = pipe->plane_res.hubp;
 
-		if (pipe->stream && (pipe->stream->mall_stream_config.type == SUBVP_MAIN ||
+		if (pipe->stream && (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN ||
 				pipe->stream->fpo_in_use)) {
 			if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
 				hubp->funcs->hubp_update_force_pstate_disallow(hubp, true);
@@ -673,8 +666,8 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context)
 			if (cursor_size > 16384)
 				cache_cursor = true;
 
-			if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-					hubp->funcs->hubp_update_mall_sel(hubp, 1, false);
+			if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
+				hubp->funcs->hubp_update_mall_sel(hubp, 1, false);
 			} else {
 				// MALL not supported with Stereo3D
 				hubp->funcs->hubp_update_mall_sel(hubp,
@@ -716,9 +709,8 @@ void dcn32_program_mall_pipe_config(struct dc *dc, struct dc_state *context)
 			 *        see if CURSOR_REQ_MODE will be back to 1 for SubVP
 			 *        when it should be 0 for MPO
 			 */
-			if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+			if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
 				hubp->funcs->hubp_prepare_subvp_buffering(hubp, true);
-			}
 		}
 	}
 }
@@ -761,6 +753,7 @@ void dcn32_init_hw(struct dc *dc)
 	int i;
 	int edp_num;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
 		dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
@@ -915,13 +908,15 @@ void dcn32_init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (abms[i] != NULL && abms[i]->funcs != NULL)
-			abms[i]->funcs->abm_init(abms[i], backlight);
+			abms[i]->funcs->abm_init(abms[i], backlight, user_level);
 	}
 
 	/* power AFMT HDMI memory TODO: may move to dis/en output save power*/
@@ -962,6 +957,12 @@ void dcn32_init_hw(struct dc *dc)
 		dc->caps.dmub_caps.subvp_psr = dc->ctx->dmub_srv->dmub->feature_caps.subvp_psr_support;
 		dc->caps.dmub_caps.gecc_enable = dc->ctx->dmub_srv->dmub->feature_caps.gecc_enable;
 		dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch;
+
+		if (dc->ctx->dmub_srv->dmub->fw_version <
+		    DMUB_FW_VERSION(7, 0, 35)) {
+			dc->debug.force_disable_subvp = true;
+			dc->debug.disable_fpo_optimizations = true;
+		}
 	}
 }
 
@@ -991,9 +992,22 @@ static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
 static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 {
 	struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
+	struct dc *dc = pipe_ctx->stream->ctx->dc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct pipe_ctx *odm_pipe;
 	int opp_cnt = 1;
+	struct dccg *dccg = dc->res_pool->dccg;
+	/* It has been found that when DSCCLK is lower than 16Mhz, we will get DCN
+	 * register access hung. When DSCCLk is based on refclk, DSCCLk is always a
+	 * fixed value higher than 16Mhz so the issue doesn't occur. When DSCCLK is
+	 * generated by DTO, DSCCLK would be based on 1/3 dispclk. For small timings
+	 * with DSC such as 480p60Hz, the dispclk could be low enough to trigger
+	 * this problem. We are implementing a workaround here to keep using dscclk
+	 * based on fixed value refclk when timing is smaller than 3x16Mhz (i.e
+	 * 48Mhz) pixel clock to avoid hitting this problem.
+	 */
+	bool should_use_dto_dscclk = (dccg->funcs->set_dto_dscclk != NULL) &&
+			stream->timing.pix_clk_100hz > 480000;
 
 	ASSERT(dsc);
 	for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
@@ -1016,12 +1030,16 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 
 		dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg);
 		dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst);
+		if (should_use_dto_dscclk)
+			dccg->funcs->set_dto_dscclk(dccg, dsc->inst);
 		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
 			struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc;
 
 			ASSERT(odm_dsc);
 			odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg);
 			odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst);
+			if (should_use_dto_dscclk)
+				dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst);
 		}
 		dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt;
 		dsc_cfg.pic_width *= opp_cnt;
@@ -1041,9 +1059,13 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 				OPTC_DSC_DISABLED, 0, 0);
 
 		/* disable DSC block */
+		if (dccg->funcs->set_ref_dscclk)
+			dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst);
 		dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc);
 		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
 			ASSERT(odm_pipe->stream_res.dsc);
+			if (dccg->funcs->set_ref_dscclk)
+				dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst);
 			odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc);
 		}
 	}
@@ -1126,6 +1148,10 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
 		if (!pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe &&
 				current_pipe_ctx->next_odm_pipe->stream_res.dsc) {
 			struct display_stream_compressor *dsc = current_pipe_ctx->next_odm_pipe->stream_res.dsc;
+			struct dccg *dccg = dc->res_pool->dccg;
+
+			if (dccg->funcs->set_ref_dscclk)
+				dccg->funcs->set_ref_dscclk(dccg, dsc->inst);
 			/* disconnect DSC block from stream */
 			dsc->funcs->dsc_disconnect(dsc);
 		}
@@ -1199,7 +1225,7 @@ void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_
 			continue;
 
 		if ((pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))
-			&& pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+			&& dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_PHANTOM) {
 			pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg);
 			reset_sync_context_for_pipe(dc, context, i);
 			otg_disabled[i] = true;
@@ -1350,8 +1376,8 @@ void dcn32_update_phantom_vp_position(struct dc *dc,
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_MAIN &&
-				pipe->stream->mall_stream_config.paired_stream == phantom_pipe->stream) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN &&
+				dc_state_get_paired_subvp_stream(context, pipe->stream) == phantom_pipe->stream) {
 			if (pipe->plane_state && pipe->plane_state->update_flags.bits.position_change) {
 
 				phantom_plane->src_rect.x = pipe->plane_state->src_rect.x;
@@ -1376,21 +1402,19 @@ void dcn32_update_phantom_vp_position(struct dc *dc,
 void dcn32_apply_update_flags_for_phantom(struct pipe_ctx *phantom_pipe)
 {
 	phantom_pipe->update_flags.raw = 0;
-	if (phantom_pipe->stream && phantom_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-		if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) {
-			phantom_pipe->update_flags.bits.enable = 1;
-			phantom_pipe->update_flags.bits.mpcc = 1;
-			phantom_pipe->update_flags.bits.dppclk = 1;
-			phantom_pipe->update_flags.bits.hubp_interdependent = 1;
-			phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
-			phantom_pipe->update_flags.bits.gamut_remap = 1;
-			phantom_pipe->update_flags.bits.scaler = 1;
-			phantom_pipe->update_flags.bits.viewport = 1;
-			phantom_pipe->update_flags.bits.det_size = 1;
-			if (resource_is_pipe_type(phantom_pipe, OTG_MASTER)) {
-				phantom_pipe->update_flags.bits.odm = 1;
-				phantom_pipe->update_flags.bits.global_sync = 1;
-			}
+	if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) {
+		phantom_pipe->update_flags.bits.enable = 1;
+		phantom_pipe->update_flags.bits.mpcc = 1;
+		phantom_pipe->update_flags.bits.dppclk = 1;
+		phantom_pipe->update_flags.bits.hubp_interdependent = 1;
+		phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
+		phantom_pipe->update_flags.bits.gamut_remap = 1;
+		phantom_pipe->update_flags.bits.scaler = 1;
+		phantom_pipe->update_flags.bits.viewport = 1;
+		phantom_pipe->update_flags.bits.det_size = 1;
+		if (resource_is_pipe_type(phantom_pipe, OTG_MASTER)) {
+			phantom_pipe->update_flags.bits.odm = 1;
+			phantom_pipe->update_flags.bits.global_sync = 1;
 		}
 	}
 }
@@ -1462,8 +1486,8 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
 		 * pipe, wait for the double buffer update to complete first before we do
 		 * ANY phantom pipe programming.
 		 */
-		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM &&
-				old_pipe->stream && old_pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+		if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM &&
+				old_pipe->stream && dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) != SUBVP_PHANTOM) {
 			old_pipe->stream_res.tg->funcs->wait_for_state(
 					old_pipe->stream_res.tg,
 					CRTC_STATE_VBLANK);
@@ -1475,7 +1499,7 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
 
-		if (new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (new_pipe->stream && dc_state_get_pipe_subvp_type(context, new_pipe) == SUBVP_PHANTOM) {
 			// If old context or new context has phantom pipes, apply
 			// the phantom timings now. We can't change the phantom
 			// pipe configuration safely without driver acquiring
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
index 427cfc8c24a4..427cfc8c24a4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h
index 89a591eb2c23..89a591eb2c23 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 5a8258287438..9c806385ecbd 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -56,6 +56,7 @@
 #include "dcn30/dcn30_cm_common.h"
 #include "dcn31/dcn31_hwseq.h"
 #include "dcn20/dcn20_hwseq.h"
+#include "dc_state_priv.h"
 
 #define DC_LOGGER_INIT(logger) \
 	struct dal_logger *dc_logger = logger
@@ -133,6 +134,7 @@ void dcn35_init_hw(struct dc *dc)
 	struct dc_bios *dcb = dc->ctx->dc_bios;
 	struct resource_pool *res_pool = dc->res_pool;
 	uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+	uint32_t user_level = MAX_BACKLIGHT_LEVEL;
 	int i;
 
 	if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
@@ -145,17 +147,36 @@ void dcn35_init_hw(struct dc *dc)
 		hws->funcs.bios_golden_init(dc);
 	}
 
-	REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
-	REG_WRITE(DCCG_GATE_DISABLE_CNTL2,  0);
-
-	/* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */
-	REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1,
-			PHYBSYMCLK_ROOT_GATE_DISABLE, 1,
-			PHYCSYMCLK_ROOT_GATE_DISABLE, 1,
-			PHYDSYMCLK_ROOT_GATE_DISABLE, 1,
-			PHYESYMCLK_ROOT_GATE_DISABLE, 1);
+	if (!dc->debug.disable_clock_gate) {
+		REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
+		REG_WRITE(DCCG_GATE_DISABLE_CNTL2,  0);
+
+		/* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */
+		REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1,
+				PHYBSYMCLK_ROOT_GATE_DISABLE, 1,
+				PHYCSYMCLK_ROOT_GATE_DISABLE, 1,
+				PHYDSYMCLK_ROOT_GATE_DISABLE, 1,
+				PHYESYMCLK_ROOT_GATE_DISABLE, 1);
+
+		REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL4,
+				DPIASYMCLK0_GATE_DISABLE, 0,
+				DPIASYMCLK1_GATE_DISABLE, 0,
+				DPIASYMCLK2_GATE_DISABLE, 0,
+				DPIASYMCLK3_GATE_DISABLE, 0);
+
+		REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0xFFFFFFFF);
+		REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5,
+				DTBCLK_P0_GATE_DISABLE, 0,
+				DTBCLK_P1_GATE_DISABLE, 0,
+				DTBCLK_P2_GATE_DISABLE, 0,
+				DTBCLK_P3_GATE_DISABLE, 0);
+		REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5,
+				DPSTREAMCLK0_GATE_DISABLE, 0,
+				DPSTREAMCLK1_GATE_DISABLE, 0,
+				DPSTREAMCLK2_GATE_DISABLE, 0,
+				DPSTREAMCLK3_GATE_DISABLE, 0);
 
-	REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0x1f7c3fcf);
+	}
 
 	// Initialize the dccg
 	if (res_pool->dccg->funcs->dccg_init)
@@ -260,13 +281,15 @@ void dcn35_init_hw(struct dc *dc)
 	for (i = 0; i < dc->link_count; i++) {
 		struct dc_link *link = dc->links[i];
 
-		if (link->panel_cntl)
+		if (link->panel_cntl) {
 			backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+			user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+		}
 	}
 	if (dc->ctx->dmub_srv) {
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (abms[i] != NULL && abms[i]->funcs != NULL)
-			abms[i]->funcs->abm_init(abms[i], backlight);
+			abms[i]->funcs->abm_init(abms[i], backlight, user_level);
 		}
 	}
 
@@ -332,9 +355,6 @@ void dcn35_init_hw(struct dc *dc)
 	if (dc->res_pool->pg_cntl) {
 		if (dc->res_pool->pg_cntl->funcs->init_pg_status)
 			dc->res_pool->pg_cntl->funcs->init_pg_status(dc->res_pool->pg_cntl);
-
-		if (dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22)
-			dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22(dc->res_pool->pg_cntl, false);
 	}
 }
 
@@ -671,11 +691,7 @@ bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable)
 	}
 
 	// TODO: review other cases when idle optimization is allowed
-
-	if (!enable)
-		dc_dmub_srv_exit_low_power_state(dc);
-	else
-		dc_dmub_srv_notify_idle(dc, enable);
+	dc_dmub_srv_apply_idle_power_optimizations(dc, enable);
 
 	return true;
 }
@@ -685,7 +701,7 @@ void dcn35_z10_restore(const struct dc *dc)
 	if (dc->debug.disable_z10)
 		return;
 
-	dc_dmub_srv_exit_low_power_state(dc);
+	dc_dmub_srv_apply_idle_power_optimizations(dc, false);
 
 	dcn31_z10_restore(dc);
 }
@@ -801,12 +817,12 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context)
 		dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
 
-		hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+		hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
 
 		if (tg->funcs->is_tg_enabled(tg))
 			tg->funcs->unlock(tg);
 
-		dc->hwss.disable_plane(dc, pipe_ctx);
+		dc->hwss.disable_plane(dc, context, pipe_ctx);
 
 		pipe_ctx->stream_res.tg = NULL;
 		pipe_ctx->plane_res.hubp = NULL;
@@ -933,10 +949,10 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	pipe_ctx->plane_state = NULL;
 }
 
-void dcn35_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
 {
 	struct dce_hwseq *hws = dc->hwseq;
-	bool is_phantom = pipe_ctx->plane_state && pipe_ctx->plane_state->is_phantom;
+	bool is_phantom = dc_state_get_pipe_subvp_type(state, pipe_ctx) == SUBVP_PHANTOM;
 	struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL;
 
 	DC_LOGGER_INIT(dc->ctx->logger);
@@ -963,6 +979,8 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
 	bool hpo_frl_stream_enc_acquired = false;
 	bool hpo_dp_stream_enc_acquired = false;
 	int i = 0, j = 0;
+	int edp_num = 0;
+	struct dc_link *edp_links[MAX_NUM_EDP] = { NULL };
 
 	memset(update_state, 0, sizeof(struct pg_block_update));
 
@@ -1003,10 +1021,24 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
 
 		if (pipe_ctx->stream_res.opp)
 			update_state->pg_pipe_res_update[PG_OPP][pipe_ctx->stream_res.opp->inst] = false;
+	}
+	/*domain24 controls all the otg, mpc, opp, as long as one otg is still up, avoid enabling OTG PG*/
+	for (i = 0; i < dc->res_pool->timing_generator_count; i++) {
+		struct timing_generator *tg = dc->res_pool->timing_generators[i];
+		if (tg && tg->funcs->is_tg_enabled(tg)) {
+			update_state->pg_pipe_res_update[PG_OPTC][i] = false;
+			break;
+		}
+	}
 
-		if (pipe_ctx->stream_res.tg)
-			update_state->pg_pipe_res_update[PG_OPTC][pipe_ctx->stream_res.tg->inst] = false;
+	dc_get_edp_links(dc, edp_links, &edp_num);
+	if (edp_num == 0 ||
+		((!edp_links[0] || !edp_links[0]->edp_sink_present) &&
+			(!edp_links[1] || !edp_links[1]->edp_sink_present))) {
+		/*eDP not exist on this config, keep Domain24 power on, for S0i3, this will be handled in dmubfw*/
+		update_state->pg_pipe_res_update[PG_OPTC][0] = false;
 	}
+
 }
 
 void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
@@ -1092,8 +1124,29 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
 
 }
 
-void dcn35_block_power_control(struct dc *dc,
-	struct pg_block_update *update_state, bool power_on)
+/**
+ * dcn35_hw_block_power_down() - power down sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power down:
+ *
+ *	ONO Region 3, DCPG 25: hpo - SKIPPED
+ *	ONO Region 4, DCPG 0: dchubp0, dpp0
+ *	ONO Region 6, DCPG 1: dchubp1, dpp1
+ *	ONO Region 8, DCPG 2: dchubp2, dpp2
+ *	ONO Region 10, DCPG 3: dchubp3, dpp3
+ *	ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will pwr dwn at IPS2 entry
+ *	ONO Region 5, DCPG 16: dsc0
+ *	ONO Region 7, DCPG 17: dsc1
+ *	ONO Region 9, DCPG 18: dsc2
+ *	ONO Region 11, DCPG 19: dsc3
+ *	ONO Region 2, DCPG 24: mpc opp optc dwb
+ *	ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after lono timer is armed
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
+ */
+void dcn35_hw_block_power_down(struct dc *dc,
+	struct pg_block_update *update_state)
 {
 	int i = 0;
 	struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
@@ -1102,64 +1155,106 @@ void dcn35_block_power_control(struct dc *dc,
 		return;
 	if (dc->debug.ignore_pg)
 		return;
+
 	if (update_state->pg_res_update[PG_HPO]) {
 		if (pg_cntl->funcs->hpo_pg_control)
-			pg_cntl->funcs->hpo_pg_control(pg_cntl, power_on);
+			pg_cntl->funcs->hpo_pg_control(pg_cntl, false);
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
 			update_state->pg_pipe_res_update[PG_DPP][i]) {
 			if (pg_cntl->funcs->hubp_dpp_pg_control)
-				pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, power_on);
+				pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, false);
 		}
-
+	}
+	for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++)
 		if (update_state->pg_pipe_res_update[PG_DSC][i]) {
 			if (pg_cntl->funcs->dsc_pg_control)
-				pg_cntl->funcs->dsc_pg_control(pg_cntl, i, power_on);
+				pg_cntl->funcs->dsc_pg_control(pg_cntl, i, false);
 		}
 
-		if (update_state->pg_pipe_res_update[PG_MPCC][i]) {
-			if (pg_cntl->funcs->mpcc_pg_control)
-				pg_cntl->funcs->mpcc_pg_control(pg_cntl, i, power_on);
-		}
-
-		if (update_state->pg_pipe_res_update[PG_OPP][i]) {
-			if (pg_cntl->funcs->opp_pg_control)
-				pg_cntl->funcs->opp_pg_control(pg_cntl, i, power_on);
-		}
 
-		if (update_state->pg_pipe_res_update[PG_OPTC][i]) {
-			if (pg_cntl->funcs->optc_pg_control)
-				pg_cntl->funcs->optc_pg_control(pg_cntl, i, power_on);
-		}
-	}
+	/*this will need all the clients to unregister optc interruts let dmubfw handle this*/
+	if (pg_cntl->funcs->plane_otg_pg_control)
+		pg_cntl->funcs->plane_otg_pg_control(pg_cntl, false);
 
-	if (update_state->pg_res_update[PG_DWB]) {
-		if (pg_cntl->funcs->dwb_pg_control)
-			pg_cntl->funcs->dwb_pg_control(pg_cntl, power_on);
-	}
+	//domain22, 23, 25 currently always on.
 
-	if (pg_cntl->funcs->plane_otg_pg_control)
-		pg_cntl->funcs->plane_otg_pg_control(pg_cntl, power_on);
 }
 
-void dcn35_root_clock_control(struct dc *dc,
-	struct pg_block_update *update_state, bool power_on)
+/**
+ * dcn35_hw_block_power_up() - power up sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power up:
+ *
+ *	ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED
+ *	ONO Region 2, DCPG 24: mpc opp optc dwb
+ *	ONO Region 5, DCPG 16: dsc0
+ *	ONO Region 7, DCPG 17: dsc1
+ *	ONO Region 9, DCPG 18: dsc2
+ *	ONO Region 11, DCPG 19: dsc3
+ *	ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will power up at IPS2 exit
+ *	ONO Region 4, DCPG 0: dchubp0, dpp0
+ *	ONO Region 6, DCPG 1: dchubp1, dpp1
+ *	ONO Region 8, DCPG 2: dchubp2, dpp2
+ *	ONO Region 10, DCPG 3: dchubp3, dpp3
+ *	ONO Region 3, DCPG 25: hpo - SKIPPED
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
+ */
+void dcn35_hw_block_power_up(struct dc *dc,
+	struct pg_block_update *update_state)
 {
 	int i = 0;
 	struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
 
 	if (!pg_cntl)
 		return;
+	if (dc->debug.ignore_pg)
+		return;
+	//domain22, 23, 25 currently always on.
+	/*this will need all the clients to unregister optc interruts let dmubfw handle this*/
+	if (pg_cntl->funcs->plane_otg_pg_control)
+		pg_cntl->funcs->plane_otg_pg_control(pg_cntl, true);
+
+	for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++)
+		if (update_state->pg_pipe_res_update[PG_DSC][i]) {
+			if (pg_cntl->funcs->dsc_pg_control)
+				pg_cntl->funcs->dsc_pg_control(pg_cntl, i, true);
+		}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
 			update_state->pg_pipe_res_update[PG_DPP][i]) {
-			if (dc->hwseq->funcs.dpp_root_clock_control)
-				dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on);
+			if (pg_cntl->funcs->hubp_dpp_pg_control)
+				pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, true);
 		}
+	}
+	if (update_state->pg_res_update[PG_HPO]) {
+		if (pg_cntl->funcs->hpo_pg_control)
+			pg_cntl->funcs->hpo_pg_control(pg_cntl, true);
+	}
+}
+void dcn35_root_clock_control(struct dc *dc,
+	struct pg_block_update *update_state, bool power_on)
+{
+	int i = 0;
+	struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
 
+	if (!pg_cntl)
+		return;
+	/*enable root clock first when power up*/
+	if (power_on)
+		for (i = 0; i < dc->res_pool->pipe_count; i++) {
+			if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+				update_state->pg_pipe_res_update[PG_DPP][i]) {
+				if (dc->hwseq->funcs.dpp_root_clock_control)
+					dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on);
+			}
+		}
+	for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) {
 		if (update_state->pg_pipe_res_update[PG_DSC][i]) {
 			if (power_on) {
 				if (dc->res_pool->dccg->funcs->enable_dsc)
@@ -1170,6 +1265,15 @@ void dcn35_root_clock_control(struct dc *dc,
 			}
 		}
 	}
+	/*disable root clock first when power down*/
+	if (!power_on)
+		for (i = 0; i < dc->res_pool->pipe_count; i++) {
+			if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+				update_state->pg_pipe_res_update[PG_DPP][i]) {
+				if (dc->hwseq->funcs.dpp_root_clock_control)
+					dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on);
+			}
+		}
 }
 
 void dcn35_prepare_bandwidth(
@@ -1183,9 +1287,9 @@ void dcn35_prepare_bandwidth(
 
 		if (dc->hwss.root_clock_control)
 			dc->hwss.root_clock_control(dc, &pg_update_state, true);
-
-		if (dc->hwss.block_power_control)
-			dc->hwss.block_power_control(dc, &pg_update_state, true);
+		/*power up required HW block*/
+		if (dc->hwss.hw_block_power_up)
+			dc->hwss.hw_block_power_up(dc, &pg_update_state);
 	}
 
 	dcn20_prepare_bandwidth(dc, context);
@@ -1201,9 +1305,9 @@ void dcn35_optimize_bandwidth(
 
 	if (dc->hwss.calc_blocks_to_gate) {
 		dc->hwss.calc_blocks_to_gate(dc, context, &pg_update_state);
-
-		if (dc->hwss.block_power_control)
-			dc->hwss.block_power_control(dc, &pg_update_state, false);
+		/*try to power down unused block*/
+		if (dc->hwss.hw_block_power_down)
+			dc->hwss.hw_block_power_down(dc, &pg_update_state);
 
 		if (dc->hwss.root_clock_control)
 			dc->hwss.root_clock_control(dc, &pg_update_state, false);
@@ -1225,3 +1329,44 @@ uint32_t dcn35_get_idle_state(const struct dc *dc)
 
 	return 0;
 }
+
+void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
+		int num_pipes, struct dc_crtc_timing_adjust adjust)
+{
+	int i = 0;
+	struct drr_params params = {0};
+	// DRR set trigger event mapped to OTG_TRIG_A (bit 11) for manual control flow
+	unsigned int event_triggers = 0x800;
+	// Note DRR trigger events are generated regardless of whether num frames met.
+	unsigned int num_frames = 2;
+
+	params.vertical_total_max = adjust.v_total_max;
+	params.vertical_total_min = adjust.v_total_min;
+	params.vertical_total_mid = adjust.v_total_mid;
+	params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num;
+
+	for (i = 0; i < num_pipes; i++) {
+		if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) {
+			struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing;
+			struct dc *dc = pipe_ctx[i]->stream->ctx->dc;
+
+			if (dc->debug.static_screen_wait_frames) {
+				unsigned int frame_rate = timing->pix_clk_100hz / (timing->h_total * timing->v_total);
+
+				if (frame_rate >= 120 && dc->caps.ips_support &&
+					dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) {
+					/*ips enable case*/
+					num_frames = 2 * (frame_rate % 60);
+				}
+			}
+			if (pipe_ctx[i]->stream_res.tg->funcs->set_drr)
+				pipe_ctx[i]->stream_res.tg->funcs->set_drr(
+					pipe_ctx[i]->stream_res.tg, &params);
+			if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
+				if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control)
+					pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
+						pipe_ctx[i]->stream_res.tg,
+						event_triggers, num_frames);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
index 0dff10d179b8..fd66316e33de 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
@@ -57,14 +57,16 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context);
 void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx);
 void dcn35_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx,
 			       struct dc_state *context);
-void dcn35_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 
 void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
 	struct pg_block_update *update_state);
 void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
 	struct pg_block_update *update_state);
-void dcn35_block_power_control(struct dc *dc,
-	struct pg_block_update *update_state, bool power_on);
+void dcn35_hw_block_power_up(struct dc *dc,
+	struct pg_block_update *update_state);
+void dcn35_hw_block_power_down(struct dc *dc,
+	struct pg_block_update *update_state);
 void dcn35_root_clock_control(struct dc *dc,
 	struct pg_block_update *update_state, bool power_on);
 
@@ -84,4 +86,8 @@ void dcn35_dsc_pg_control(
 
 void dcn35_set_idle_state(const struct dc *dc, bool allow_idle);
 uint32_t dcn35_get_idle_state(const struct dc *dc);
+
+void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
+		int num_pipes, struct dc_crtc_timing_adjust adjust);
+
 #endif /* __DC_HWSS_DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
index 296bf3a38cb9..a630aa77dcec 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
@@ -68,7 +68,7 @@ static const struct hw_sequencer_funcs dcn35_funcs = {
 	.prepare_bandwidth = dcn35_prepare_bandwidth,
 	.optimize_bandwidth = dcn35_optimize_bandwidth,
 	.update_bandwidth = dcn20_update_bandwidth,
-	.set_drr = dcn10_set_drr,
+	.set_drr = dcn35_set_drr,
 	.get_position = dcn10_get_position,
 	.set_static_screen_control = dcn30_set_static_screen_control,
 	.setup_stereo = dcn10_setup_stereo,
@@ -118,7 +118,8 @@ static const struct hw_sequencer_funcs dcn35_funcs = {
 	.update_dsc_pg = dcn32_update_dsc_pg,
 	.calc_blocks_to_gate = dcn35_calc_blocks_to_gate,
 	.calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate,
-	.block_power_control = dcn35_block_power_control,
+	.hw_block_power_up = dcn35_hw_block_power_up,
+	.hw_block_power_down = dcn35_hw_block_power_down,
 	.root_clock_control = dcn35_root_clock_control,
 	.set_idle_state = dcn35_set_idle_state,
 	.get_idle_state = dcn35_get_idle_state
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h
index b67015032c35..b67015032c35 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt
new file mode 100644
index 000000000000..951ca2da4486
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt
@@ -0,0 +1,4 @@
+dal3_subdirectory_sources(
+  dcn351_init.c
+  dcn351_init.h
+)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile
new file mode 100644
index 000000000000..b24ad27fe6ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile
@@ -0,0 +1,17 @@
+#
+# (c) Copyright 2022 Advanced Micro Devices, Inc. All the rights reserved
+#
+#  All rights reserved.  This notice is intended as a precaution against
+#  inadvertent publication and does not imply publication or any waiver
+#  of confidentiality.  The year included in the foregoing notice is the
+#  year of creation of the work.
+#
+#  Authors: AMD
+#
+# Makefile for DCN351.
+
+DCN351 = dcn351_init.o
+
+AMD_DAL_DCN351 = $(addprefix $(AMDDALPATH)/dc/dcn351/,$(DCN351))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCN351)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
new file mode 100644
index 000000000000..143d3fc0221c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
+#include "dcn20/dcn20_hwseq.h"
+#include "dcn21/dcn21_hwseq.h"
+#include "dcn30/dcn30_hwseq.h"
+#include "dcn301/dcn301_hwseq.h"
+#include "dcn31/dcn31_hwseq.h"
+#include "dcn32/dcn32_hwseq.h"
+#include "dcn35/dcn35_hwseq.h"
+
+#include "dcn351_init.h"
+
+static const struct hw_sequencer_funcs dcn351_funcs = {
+	.program_gamut_remap = dcn30_program_gamut_remap,
+	.init_hw = dcn35_init_hw,
+	.power_down_on_boot = dcn35_power_down_on_boot,
+	.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
+	.apply_ctx_for_surface = NULL,
+	.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+	.wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
+	.post_unlock_program_front_end = dcn20_post_unlock_program_front_end,
+	.update_plane_addr = dcn20_update_plane_addr,
+	.update_dchub = dcn10_update_dchub,
+	.update_pending_status = dcn10_update_pending_status,
+	.program_output_csc = dcn20_program_output_csc,
+	.enable_accelerated_mode = dce110_enable_accelerated_mode,
+	.enable_timing_synchronization = dcn10_enable_timing_synchronization,
+	.enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,
+	.update_info_frame = dcn31_update_info_frame,
+	.send_immediate_sdp_message = dcn10_send_immediate_sdp_message,
+	.enable_stream = dcn20_enable_stream,
+	.disable_stream = dce110_disable_stream,
+	.unblank_stream = dcn32_unblank_stream,
+	.blank_stream = dce110_blank_stream,
+	.enable_audio_stream = dce110_enable_audio_stream,
+	.disable_audio_stream = dce110_disable_audio_stream,
+	.disable_plane = dcn35_disable_plane,
+	.disable_pixel_data = dcn20_disable_pixel_data,
+	.pipe_control_lock = dcn20_pipe_control_lock,
+	.interdependent_update_lock = dcn10_lock_all_pipes,
+	.cursor_lock = dcn10_cursor_lock,
+	.prepare_bandwidth = dcn35_prepare_bandwidth,
+	.optimize_bandwidth = dcn35_optimize_bandwidth,
+	.update_bandwidth = dcn20_update_bandwidth,
+	.set_drr = dcn10_set_drr,
+	.get_position = dcn10_get_position,
+	.set_static_screen_control = dcn30_set_static_screen_control,
+	.setup_stereo = dcn10_setup_stereo,
+	.set_avmute = dcn30_set_avmute,
+	.log_hw_state = dcn10_log_hw_state,
+	.get_hw_state = dcn10_get_hw_state,
+	.clear_status_bits = dcn10_clear_status_bits,
+	.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
+	.edp_backlight_control = dce110_edp_backlight_control,
+	.edp_power_control = dce110_edp_power_control,
+	.edp_wait_for_T12 = dce110_edp_wait_for_T12,
+	.edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
+	.set_cursor_position = dcn10_set_cursor_position,
+	.set_cursor_attribute = dcn10_set_cursor_attribute,
+	.set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
+	.setup_periodic_interrupt = dcn10_setup_periodic_interrupt,
+	.set_clock = dcn10_set_clock,
+	.get_clock = dcn10_get_clock,
+	.program_triplebuffer = dcn20_program_triple_buffer,
+	.enable_writeback = dcn30_enable_writeback,
+	.disable_writeback = dcn30_disable_writeback,
+	.update_writeback = dcn30_update_writeback,
+	.mmhubbub_warmup = dcn30_mmhubbub_warmup,
+	.dmdata_status_done = dcn20_dmdata_status_done,
+	.program_dmdata_engine = dcn30_program_dmdata_engine,
+	.set_dmdata_attributes = dcn20_set_dmdata_attributes,
+	.init_sys_ctx = dcn31_init_sys_ctx,
+	.init_vm_ctx = dcn20_init_vm_ctx,
+	.set_flip_control_gsl = dcn20_set_flip_control_gsl,
+	.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
+	.calc_vupdate_position = dcn10_calc_vupdate_position,
+	.power_down = dce110_power_down,
+	.set_backlight_level = dcn21_set_backlight_level,
+	.set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
+	.set_pipe = dcn21_set_pipe,
+	.enable_lvds_link_output = dce110_enable_lvds_link_output,
+	.enable_tmds_link_output = dce110_enable_tmds_link_output,
+	.enable_dp_link_output = dce110_enable_dp_link_output,
+	.disable_link_output = dcn32_disable_link_output,
+	.z10_restore = dcn35_z10_restore,
+	.z10_save_init = dcn31_z10_save_init,
+	.set_disp_pattern_generator = dcn30_set_disp_pattern_generator,
+	.optimize_pwr_state = dcn21_optimize_pwr_state,
+	.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
+	.update_visual_confirm_color = dcn10_update_visual_confirm_color,
+	.apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations,
+	.update_dsc_pg = dcn32_update_dsc_pg,
+	.calc_blocks_to_gate = dcn35_calc_blocks_to_gate,
+	.calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate,
+	.hw_block_power_up = dcn35_hw_block_power_up,
+	.hw_block_power_down = dcn35_hw_block_power_down,
+	.root_clock_control = dcn35_root_clock_control,
+	.set_idle_state = dcn35_set_idle_state,
+	.get_idle_state = dcn35_get_idle_state
+};
+
+static const struct hwseq_private_funcs dcn351_private_funcs = {
+	.init_pipes = dcn35_init_pipes,
+	.update_plane_addr = dcn20_update_plane_addr,
+	.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
+	.update_mpcc = dcn20_update_mpcc,
+	.set_input_transfer_func = dcn32_set_input_transfer_func,
+	.set_output_transfer_func = dcn32_set_output_transfer_func,
+	.power_down = dce110_power_down,
+	.enable_display_power_gating = dcn10_dummy_display_power_gating,
+	.blank_pixel_data = dcn20_blank_pixel_data,
+	.reset_hw_ctx_wrap = dcn31_reset_hw_ctx_wrap,
+	.enable_stream_timing = dcn20_enable_stream_timing,
+	.edp_backlight_control = dce110_edp_backlight_control,
+	.setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt,
+	.did_underflow_occur = dcn10_did_underflow_occur,
+	.init_blank = dcn20_init_blank,
+	.disable_vga = NULL,
+	.bios_golden_init = dcn10_bios_golden_init,
+	.plane_atomic_disable = dcn35_plane_atomic_disable,
+	//.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/
+	//.hubp_pg_control = dcn35_hubp_pg_control,
+	.enable_power_gating_plane = dcn35_enable_power_gating_plane,
+	.dpp_root_clock_control = dcn35_dpp_root_clock_control,
+	.program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree,
+	.update_odm = dcn35_update_odm,
+	.set_hdr_multiplier = dcn10_set_hdr_multiplier,
+	.verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high,
+	.wait_for_blank_complete = dcn20_wait_for_blank_complete,
+	.dccg_init = dcn20_dccg_init,
+	.set_mcm_luts = dcn32_set_mcm_luts,
+	.setup_hpo_hw_control = dcn35_setup_hpo_hw_control,
+	.calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values,
+	.set_pixels_per_cycle = dcn32_set_pixels_per_cycle,
+	.is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy,
+	.dsc_pg_control = dcn35_dsc_pg_control,
+	.dsc_pg_status = dcn32_dsc_pg_status,
+	.enable_plane = dcn35_enable_plane,
+};
+
+void dcn351_hw_sequencer_construct(struct dc *dc)
+{
+	dc->hwss = dcn351_funcs;
+	dc->hwseq->funcs = dcn351_private_funcs;
+
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h
new file mode 100644
index 000000000000..970b01008b23
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_DCN351_INIT_H__
+#define __DC_DCN351_INIT_H__
+
+struct dc;
+
+void dcn351_hw_sequencer_construct(struct dc *dc);
+
+#endif /* __DC_DCN351_INIT_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
index 452680fe9aab..a54399383318 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
@@ -50,7 +50,7 @@ struct pg_block_update;
 struct subvp_pipe_control_lock_fast_params {
 	struct dc *dc;
 	bool lock;
-	struct pipe_ctx *pipe_ctx;
+	bool subvp_immediate_flip;
 };
 
 struct pipe_control_lock_params {
@@ -200,7 +200,7 @@ struct hw_sequencer_funcs {
 			struct dc_state *context);
 	enum dc_status (*apply_ctx_to_hw)(struct dc *dc,
 			struct dc_state *context);
-	void (*disable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx);
+	void (*disable_plane)(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
 	void (*disable_pixel_data)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool blank);
 	void (*apply_ctx_for_surface)(struct dc *dc,
 			const struct dc_stream_state *stream,
@@ -248,6 +248,7 @@ struct hw_sequencer_funcs {
 	void (*enable_per_frame_crtc_position_reset)(struct dc *dc,
 			int group_size, struct pipe_ctx *grouped_pipes[]);
 	void (*enable_timing_synchronization)(struct dc *dc,
+			struct dc_state *state,
 			int group_index, int group_size,
 			struct pipe_ctx *grouped_pipes[]);
 	void (*enable_vblanks_synchronization)(struct dc *dc,
@@ -414,8 +415,10 @@ struct hw_sequencer_funcs {
 		struct pg_block_update *update_state);
 	void (*calc_blocks_to_ungate)(struct dc *dc, struct dc_state *context,
 		struct pg_block_update *update_state);
-	void (*block_power_control)(struct dc *dc,
-		struct pg_block_update *update_state, bool power_on);
+	void (*hw_block_power_up)(struct dc *dc,
+		struct pg_block_update *update_state);
+	void (*hw_block_power_down)(struct dc *dc,
+		struct pg_block_update *update_state);
 	void (*root_clock_control)(struct dc *dc,
 		struct pg_block_update *update_state, bool power_on);
 	void (*set_idle_state)(const struct dc *dc, bool allow_idle);
@@ -452,17 +455,18 @@ void get_mpctree_visual_confirm_color(
 		struct tg_color *color);
 
 void get_subvp_visual_confirm_color(
-	struct dc *dc,
-	struct dc_state *context,
 	struct pipe_ctx *pipe_ctx,
 	struct tg_color *color);
 
 void get_mclk_switch_visual_confirm_color(
-		struct dc *dc,
-		struct dc_state *context,
 		struct pipe_ctx *pipe_ctx,
 		struct tg_color *color);
 
+void set_p_state_switch_method(
+		struct dc *dc,
+		struct dc_state *context,
+		struct pipe_ctx *pipe_ctx);
+
 void hwss_execute_sequence(struct dc *dc,
 		struct block_sequence block_sequence[],
 		int num_steps);
@@ -472,7 +476,8 @@ void hwss_build_fast_sequence(struct dc *dc,
 		unsigned int dmub_cmd_count,
 		struct block_sequence block_sequence[],
 		int *num_steps,
-		struct pipe_ctx *pipe_ctx);
+		struct pipe_ctx *pipe_ctx,
+		struct dc_stream_status *stream_status);
 
 void hwss_send_dmcub_cmd(union block_sequence_params *params);
 
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
index 82c592166875..6137cf09aa54 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
@@ -79,6 +79,7 @@ struct hwseq_private_funcs {
 	void (*update_plane_addr)(const struct dc *dc,
 			struct pipe_ctx *pipe_ctx);
 	void (*plane_atomic_disconnect)(struct dc *dc,
+			struct dc_state *state,
 			struct pipe_ctx *pipe_ctx);
 	void (*update_mpcc)(struct dc *dc, struct pipe_ctx *pipe_ctx);
 	bool (*set_input_transfer_func)(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index bac1420b1de8..f74ae0d41d3c 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -200,11 +200,8 @@ struct resource_funcs {
 			unsigned int pipe_cnt,
             unsigned int index);
 
-	bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context, bool fast_update);
-	void (*retain_phantom_pipes)(struct dc *dc, struct dc_state *context);
 	void (*get_panel_config_defaults)(struct dc_panel_config *panel_config);
-	void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
-	void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
+	void (*build_pipe_pix_clk_params)(struct pipe_ctx *pipe_ctx);
 };
 
 struct audio_support{
@@ -384,6 +381,16 @@ union pipe_update_flags {
 	uint32_t raw;
 };
 
+enum p_state_switch_method {
+	P_STATE_UNKNOWN						= 0,
+	P_STATE_V_BLANK						= 1,
+	P_STATE_FPO,
+	P_STATE_V_ACTIVE,
+	P_STATE_SUB_VP,
+	P_STATE_DRR_SUB_VP,
+	P_STATE_V_BLANK_SUB_VP
+};
+
 struct pipe_ctx {
 	struct dc_plane_state *plane_state;
 	struct dc_stream_state *stream;
@@ -432,6 +439,7 @@ struct pipe_ctx {
 	struct dwbc *dwbc;
 	struct mcif_wb *mcif_wb;
 	union pipe_update_flags update_flags;
+	enum p_state_switch_method p_state_type;
 	struct tg_color visual_confirm_color;
 	bool has_vactive_margin;
 	/* subvp_index: only valid if the pipe is a SUBVP_MAIN*/
@@ -525,6 +533,14 @@ struct dc_state {
 	 * @stream_status: Planes status on a given stream
 	 */
 	struct dc_stream_status stream_status[MAX_PIPES];
+	/**
+	 * @phantom_streams: Stream state properties for phantoms
+	 */
+	struct dc_stream_state *phantom_streams[MAX_PHANTOM_PIPES];
+	/**
+	 * @phantom_planes: Planes state properties for phantoms
+	 */
+	struct dc_plane_state *phantom_planes[MAX_PHANTOM_PIPES];
 
 	/**
 	 * @stream_count: Total of streams in use
@@ -533,6 +549,14 @@ struct dc_state {
 	uint8_t stream_mask;
 
 	/**
+	 * @stream_count: Total phantom streams in use
+	 */
+	uint8_t phantom_stream_count;
+	/**
+	 * @stream_count: Total phantom planes in use
+	 */
+	uint8_t phantom_plane_count;
+	/**
 	 * @res_ctx: Persistent state of resources
 	 */
 	struct resource_context res_ctx;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
index 33db15d69f23..3f0161d64675 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
@@ -36,7 +36,7 @@ struct abm {
 };
 
 struct abm_funcs {
-	void (*abm_init)(struct abm *abm, uint32_t back_light);
+	void (*abm_init)(struct abm *abm, uint32_t back_light, uint32_t user_level);
 	bool (*set_abm_level)(struct abm *abm, unsigned int abm_level);
 	bool (*set_abm_immediate_disable)(struct abm *abm, unsigned int panel_inst);
 	bool (*set_pipe)(struct abm *abm, unsigned int controller_id, unsigned int panel_inst);
@@ -64,7 +64,8 @@ struct abm_funcs {
 	bool (*set_pipe_ex)(struct abm *abm,
 			unsigned int otg_inst,
 			unsigned int option,
-			unsigned int panel_inst);
+			unsigned int panel_inst,
+			unsigned int pwrseq_inst);
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index fa9614bcb160..cbba39d251e5 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -62,6 +62,25 @@ struct dcn3_clk_internal {
 	uint32_t CLK4_CLK0_CURRENT_CNT; //fclk
 };
 
+struct dcn35_clk_internal {
+	int dummy;
+	uint32_t CLK1_CLK0_CURRENT_CNT; //dispclk
+	uint32_t CLK1_CLK1_CURRENT_CNT; //dppclk
+	uint32_t CLK1_CLK2_CURRENT_CNT; //dprefclk
+	uint32_t CLK1_CLK3_CURRENT_CNT; //dcfclk
+	uint32_t CLK1_CLK4_CURRENT_CNT; //dtbclk
+	//uint32_t CLK1_CLK5_CURRENT_CNT; //dpiaclk
+	//uint32_t CLK1_CLK6_CURRENT_CNT; //srdbgclk
+	uint32_t CLK1_CLK3_DS_CNTL;	    //dcf_deep_sleep_divider
+	uint32_t CLK1_CLK3_ALLOW_DS;	//dcf_deep_sleep_allow
+
+	uint32_t CLK1_CLK0_BYPASS_CNTL; //dispclk bypass
+	uint32_t CLK1_CLK1_BYPASS_CNTL; //dppclk bypass
+	uint32_t CLK1_CLK2_BYPASS_CNTL; //dprefclk bypass
+	uint32_t CLK1_CLK3_BYPASS_CNTL; //dcfclk bypass
+	uint32_t CLK1_CLK4_BYPASS_CNTL; //dtbclk bypass
+};
+
 struct dcn301_clk_internal {
 	int dummy;
 	uint32_t CLK1_CLK0_CURRENT_CNT; //dispclk
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
index ce2f0c0e82bd..b9a06bf84cc9 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
@@ -59,8 +59,8 @@ enum dentist_dispclk_change_mode {
 struct dp_dto_params {
 	int otg_inst;
 	enum signal_type signal;
-	long long pixclk_hz;
-	long long refclk_hz;
+	uint64_t pixclk_hz;
+	uint64_t refclk_hz;
 };
 
 enum pixel_rate_div {
@@ -201,6 +201,10 @@ struct dccg_funcs {
 			struct dccg *dccg,
 			enum streamclk_source src,
 			uint32_t otg_inst);
+	void (*set_dto_dscclk)(
+			struct dccg *dccg,
+			uint32_t dsc_inst);
+	void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst);
 };
 
 #endif //__DAL_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
index 86b711dcc785..729ca0064e94 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
@@ -188,6 +188,10 @@ struct dwbc_funcs {
 	bool (*is_enabled)(
 		struct dwbc *dwbc);
 
+	void (*set_fc_enable)(
+		struct dwbc *dwbc,
+		enum dwb_frame_capture_enable enable);
+
 	void (*set_stereo)(
 		struct dwbc *dwbc,
 		struct dwb_stereo_params *stereo_params);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
index b95ae9596c3b..dcae23faeee3 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
@@ -43,6 +43,7 @@
  * to be used inside loops and for determining array sizes.
  */
 #define MAX_PIPES 6
+#define MAX_PHANTOM_PIPES (MAX_PIPES / 2)
 #define MAX_DIG_LINK_ENCODERS 7
 #define MAX_DWB_PIPES	1
 #define MAX_HPO_DP2_ENCODERS	4
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
index 24af9d80b937..5dcbaa2db964 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
@@ -40,6 +40,7 @@ struct panel_cntl_backlight_registers {
 	unsigned int BL_PWM_PERIOD_CNTL;
 	unsigned int LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV;
 	unsigned int PANEL_PWRSEQ_REF_DIV2;
+	unsigned int USER_LEVEL;
 };
 
 struct panel_cntl_funcs {
@@ -56,12 +57,14 @@ struct panel_cntl_funcs {
 struct panel_cntl_init_data {
 	struct dc_context *ctx;
 	uint32_t inst;
+	uint32_t pwrseq_inst;
 };
 
 struct panel_cntl {
 	const struct panel_cntl_funcs *funcs;
 	struct dc_context *ctx;
 	uint32_t inst;
+	uint32_t pwrseq_inst;
 	/* registers setting needs to be saved and restored at InitBacklight */
 	struct panel_cntl_backlight_registers stored_backlight_registers;
 };
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
index b9812afb886b..00ea3864dd4d 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
@@ -47,8 +47,6 @@ struct pg_cntl_funcs {
 	void (*optc_pg_control)(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on);
 	void (*dwb_pg_control)(struct pg_cntl *pg_cntl, bool power_on);
 	void (*init_pg_status)(struct pg_cntl *pg_cntl);
-
-	void (*set_force_poweron_domain22)(struct pg_cntl *pg_cntl, bool power_on);
 };
 
 #endif //__DC_PG_CNTL_H__
diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h
index d7685368140a..26fe81f213da 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/link.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/link.h
@@ -281,11 +281,16 @@ struct link_service {
 			const unsigned int *power_opts);
 	bool (*edp_setup_replay)(struct dc_link *link,
 			const struct dc_stream_state *stream);
+	bool (*edp_send_replay_cmd)(struct dc_link *link,
+			enum replay_FW_Message_type msg,
+			union dmub_replay_cmd_set *cmd_data);
 	bool (*edp_set_coasting_vtotal)(
 			struct dc_link *link, uint16_t coasting_vtotal);
 	bool (*edp_replay_residency)(const struct dc_link *link,
 			unsigned int *residency, const bool is_start,
 			const bool is_alpm);
+	bool (*edp_set_replay_power_opt_and_coasting_vtotal)(struct dc_link *link,
+			const unsigned int *power_opts, uint16_t coasting_vtotal);
 
 	bool (*edp_wait_for_t12)(struct dc_link *link);
 	bool (*edp_is_ilr_optimization_required)(struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index 06ca8bfb91e7..1d51fed12e20 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -501,6 +501,18 @@ int recource_find_free_pipe_not_used_in_cur_res_ctx(
 		const struct resource_pool *pool);
 
 /*
+ * Look for a free pipe in new resource context that is used in current resource
+ * context as an OTG master pipe.
+ *
+ * return - FREE_PIPE_INDEX_NOT_FOUND if free pipe is not found, otherwise
+ * pipe idx of the free pipe
+ */
+int recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
+		const struct resource_context *cur_res_ctx,
+		struct resource_context *new_res_ctx,
+		const struct resource_pool *pool);
+
+/*
  * Look for a free pipe in new resource context that is used as a secondary DPP
  * pipe in any MPCC combine in current resource context.
  * return - FREE_PIPE_INDEX_NOT_FOUND if free pipe is not found, otherwise
@@ -561,9 +573,6 @@ void update_audio_usage(
 
 unsigned int resource_pixel_format_to_bpp(enum surface_pixel_format format);
 
-void get_audio_check(struct audio_info *aud_modes,
-	struct audio_check *aud_chk);
-
 bool get_temp_dp_link_res(struct dc_link *link,
 		struct link_resource *link_res,
 		struct dc_link_settings *link_settings);
@@ -600,6 +609,9 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy(
 		struct pipe_ctx *sec_pipe,
 		bool odm);
 
+bool resource_subvp_in_use(struct dc *dc,
+		struct dc_state *context);
+
 /* A test harness interface that modifies dp encoder resources in the given dc
  * state and bypasses the need to revalidate. The interface assumes that the
  * test harness interface is called with pre-validated link config stored in the
@@ -610,5 +622,4 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc,
 		struct pipe_ctx *pipe_ctx);
 
 bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream);
-
 #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
index f2fe523f914f..24153b0df503 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
@@ -879,7 +879,7 @@ static bool detect_link_and_local_sink(struct dc_link *link,
 			(link->dpcd_sink_ext_caps.bits.oled == 1)) {
 			dpcd_set_source_specific_data(link);
 			msleep(post_oui_delay);
-			set_cached_brightness_aux(link);
+			set_default_brightness_aux(link);
 		}
 
 		return true;
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index 34a4a8c0e18c..5fe8b4871c77 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -776,10 +776,26 @@ static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable)
  */
 void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 {
+	/* TODO: Move this to HWSS as this is hardware programming sequence not a
+	 * link layer sequence
+	 */
 	struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
+	struct dc *dc = pipe_ctx->stream->ctx->dc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct pipe_ctx *odm_pipe;
 	int opp_cnt = 1;
+	struct dccg *dccg = dc->res_pool->dccg;
+	/* It has been found that when DSCCLK is lower than 16Mhz, we will get DCN
+	 * register access hung. When DSCCLk is based on refclk, DSCCLk is always a
+	 * fixed value higher than 16Mhz so the issue doesn't occur. When DSCCLK is
+	 * generated by DTO, DSCCLK would be based on 1/3 dispclk. For small timings
+	 * with DSC such as 480p60Hz, the dispclk could be low enough to trigger
+	 * this problem. We are implementing a workaround here to keep using dscclk
+	 * based on fixed value refclk when timing is smaller than 3x16Mhz (i.e
+	 * 48Mhz) pixel clock to avoid hitting this problem.
+	 */
+	bool should_use_dto_dscclk = (dccg->funcs->set_dto_dscclk != NULL) &&
+			stream->timing.pix_clk_100hz > 480000;
 	DC_LOGGER_INIT(dsc->ctx->logger);
 
 	for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
@@ -802,11 +818,15 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 
 		dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg);
 		dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst);
+		if (should_use_dto_dscclk)
+			dccg->funcs->set_dto_dscclk(dccg, dsc->inst);
 		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
 			struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc;
 
 			odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg);
 			odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst);
+			if (should_use_dto_dscclk)
+				dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst);
 		}
 		dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt;
 		dsc_cfg.pic_width *= opp_cnt;
@@ -856,9 +876,14 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 		}
 
 		/* disable DSC block */
+		if (dccg->funcs->set_ref_dscclk)
+			dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst);
 		pipe_ctx->stream_res.dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc);
-		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
+		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
+			if (dccg->funcs->set_ref_dscclk)
+				dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst);
 			odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc);
+		}
 	}
 }
 
@@ -1057,18 +1082,21 @@ static struct fixed31_32 get_pbn_from_bw_in_kbps(uint64_t kbps)
 	uint32_t denominator = 1;
 
 	/*
-	 * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006
+	 * The 1.006 factor (margin 5300ppm + 300ppm ~ 0.6% as per spec) is not
+	 * required when determining PBN/time slot utilization on the link between
+	 * us and the branch, since that overhead is already accounted for in
+	 * the get_pbn_per_slot function.
+	 *
 	 * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on
 	 * common multiplier to render an integer PBN for all link rate/lane
 	 * counts combinations
 	 * calculate
-	 * peak_kbps *= (1006/1000)
 	 * peak_kbps *= (64/54)
-	 * peak_kbps *= 8    convert to bytes
+	 * peak_kbps /= (8 * 1000) convert to bytes
 	 */
 
-	numerator = 64 * PEAK_FACTOR_X1000;
-	denominator = 54 * 8 * 1000 * 1000;
+	numerator = 64;
+	denominator = 54 * 8 * 1000;
 	kbps *= numerator;
 	peak_kbps = dc_fixpt_from_fraction(kbps, denominator);
 
@@ -1247,86 +1275,6 @@ static void remove_stream_from_alloc_table(
 	}
 }
 
-static enum dc_status deallocate_mst_payload_with_temp_drm_wa(
-		struct pipe_ctx *pipe_ctx)
-{
-	struct dc_stream_state *stream = pipe_ctx->stream;
-	struct dc_link *link = stream->link;
-	struct dc_dp_mst_stream_allocation_table proposed_table = {0};
-	struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0);
-	int i;
-	bool mst_mode = (link->type == dc_connection_mst_branch);
-	/* adjust for drm changes*/
-	const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
-	const struct dc_link_settings empty_link_settings = {0};
-	DC_LOGGER_INIT(link->ctx->logger);
-
-	if (link_hwss->ext.set_throttled_vcp_size)
-		link_hwss->ext.set_throttled_vcp_size(pipe_ctx, avg_time_slots_per_mtp);
-	if (link_hwss->ext.set_hblank_min_symbol_width)
-		link_hwss->ext.set_hblank_min_symbol_width(pipe_ctx,
-				&empty_link_settings,
-				avg_time_slots_per_mtp);
-
-	if (dm_helpers_dp_mst_write_payload_allocation_table(
-			stream->ctx,
-			stream,
-			&proposed_table,
-			false))
-		update_mst_stream_alloc_table(
-				link,
-				pipe_ctx->stream_res.stream_enc,
-				pipe_ctx->stream_res.hpo_dp_stream_enc,
-				&proposed_table);
-	else
-		DC_LOG_WARNING("Failed to update"
-				"MST allocation table for"
-				"pipe idx:%d\n",
-				pipe_ctx->pipe_idx);
-
-	DC_LOG_MST("%s"
-			"stream_count: %d: ",
-			__func__,
-			link->mst_stream_alloc_table.stream_count);
-
-	for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
-		DC_LOG_MST("stream_enc[%d]: %p      "
-		"stream[%d].hpo_dp_stream_enc: %p      "
-		"stream[%d].vcp_id: %d      "
-		"stream[%d].slot_count: %d\n",
-		i,
-		(void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
-		i,
-		(void *) link->mst_stream_alloc_table.stream_allocations[i].hpo_dp_stream_enc,
-		i,
-		link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
-		i,
-		link->mst_stream_alloc_table.stream_allocations[i].slot_count);
-	}
-
-	if (link_hwss->ext.update_stream_allocation_table == NULL ||
-			link_dp_get_encoding_format(&link->cur_link_settings) == DP_UNKNOWN_ENCODING) {
-		DC_LOG_DEBUG("Unknown encoding format\n");
-		return DC_ERROR_UNEXPECTED;
-	}
-
-	link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res,
-			&link->mst_stream_alloc_table);
-
-	if (mst_mode) {
-		dm_helpers_dp_mst_poll_for_allocation_change_trigger(
-			stream->ctx,
-			stream);
-	}
-
-	dm_helpers_dp_mst_send_payload_allocation(
-			stream->ctx,
-			stream,
-			false);
-
-	return DC_OK;
-}
-
 static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
 {
 	struct dc_stream_state *stream = pipe_ctx->stream;
@@ -1339,9 +1287,6 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
 	const struct dc_link_settings empty_link_settings = {0};
 	DC_LOGGER_INIT(link->ctx->logger);
 
-	if (link->dc->debug.temp_mst_deallocation_sequence)
-		return deallocate_mst_payload_with_temp_drm_wa(pipe_ctx);
-
 	/* deallocate_mst_payload is called before disable link. When mode or
 	 * disable/enable monitor, new stream is created which is not in link
 	 * stream[] yet. For this, payload is not allocated yet, so de-alloc
@@ -1414,16 +1359,14 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
 	link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res,
 			&link->mst_stream_alloc_table);
 
-	if (mst_mode) {
+	if (mst_mode)
 		dm_helpers_dp_mst_poll_for_allocation_change_trigger(
 			stream->ctx,
 			stream);
 
-		dm_helpers_dp_mst_send_payload_allocation(
-				stream->ctx,
-				stream,
-				false);
-	}
+	dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+			stream->ctx,
+			stream);
 
 	return DC_OK;
 }
@@ -1504,12 +1447,10 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx)
 			stream->ctx,
 			stream);
 
-	if (ret != ACT_LINK_LOST) {
+	if (ret != ACT_LINK_LOST)
 		dm_helpers_dp_mst_send_payload_allocation(
 				stream->ctx,
-				stream,
-				true);
-	}
+				stream);
 
 	/* slot X.Y for only current stream */
 	pbn_per_slot = get_pbn_per_slot(stream);
@@ -1769,8 +1710,7 @@ enum dc_status link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_in
 	/* send ALLOCATE_PAYLOAD sideband message with updated pbn */
 	dm_helpers_dp_mst_send_payload_allocation(
 			stream->ctx,
-			stream,
-			true);
+			stream);
 
 	/* notify immediate branch device table update */
 	if (dm_helpers_dp_mst_write_payload_allocation_table(
@@ -1899,8 +1839,7 @@ enum dc_status link_increase_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_
 		/* send ALLOCATE_PAYLOAD sideband message with updated pbn */
 		dm_helpers_dp_mst_send_payload_allocation(
 				stream->ctx,
-				stream,
-				true);
+				stream);
 	}
 
 	/* increase throttled vcp size */
@@ -2142,8 +2081,7 @@ static enum dc_status enable_link_dp(struct dc_state *state,
 	if (link->dpcd_sink_ext_caps.bits.oled == 1 ||
 		link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1 ||
 		link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1) {
-		set_cached_brightness_aux(link);
-
+		set_default_brightness_aux(link);
 		if (link->dpcd_sink_ext_caps.bits.oled == 1)
 			msleep(bl_oled_enable_delay);
 		edp_backlight_enable_aux(link, true);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
index 7abfc67d10a6..37d3027c32dc 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
@@ -213,8 +213,10 @@ static void construct_link_service_edp_panel_control(struct link_service *link_s
 	link_srv->edp_get_replay_state = edp_get_replay_state;
 	link_srv->edp_set_replay_allow_active = edp_set_replay_allow_active;
 	link_srv->edp_setup_replay = edp_setup_replay;
+	link_srv->edp_send_replay_cmd = edp_send_replay_cmd;
 	link_srv->edp_set_coasting_vtotal = edp_set_coasting_vtotal;
 	link_srv->edp_replay_residency = edp_replay_residency;
+	link_srv->edp_set_replay_power_opt_and_coasting_vtotal = edp_set_replay_power_opt_and_coasting_vtotal;
 
 	link_srv->edp_wait_for_t12 = edp_wait_for_t12;
 	link_srv->edp_is_ilr_optimization_required =
@@ -368,6 +370,30 @@ static enum transmitter translate_encoder_to_transmitter(
 	}
 }
 
+static uint8_t translate_dig_inst_to_pwrseq_inst(struct dc_link *link)
+{
+	uint8_t pwrseq_inst = 0xF;
+	struct dc_context *dc_ctx = link->dc->ctx;
+
+	DC_LOGGER_INIT(dc_ctx->logger);
+
+	switch (link->eng_id) {
+	case ENGINE_ID_DIGA:
+		pwrseq_inst = 0;
+		break;
+	case ENGINE_ID_DIGB:
+		pwrseq_inst = 1;
+		break;
+	default:
+		DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", link->eng_id);
+		ASSERT(false);
+		break;
+	}
+
+	return pwrseq_inst;
+}
+
+
 static void link_destruct(struct dc_link *link)
 {
 	int i;
@@ -595,24 +621,6 @@ static bool construct_phy(struct dc_link *link,
 	link->ddc_hw_inst =
 		dal_ddc_get_line(get_ddc_pin(link->ddc));
 
-
-	if (link->dc->res_pool->funcs->panel_cntl_create &&
-		(link->link_id.id == CONNECTOR_ID_EDP ||
-			link->link_id.id == CONNECTOR_ID_LVDS)) {
-		panel_cntl_init_data.ctx = dc_ctx;
-		panel_cntl_init_data.inst =
-			panel_cntl_init_data.ctx->dc_edp_id_count;
-		link->panel_cntl =
-			link->dc->res_pool->funcs->panel_cntl_create(
-								&panel_cntl_init_data);
-		panel_cntl_init_data.ctx->dc_edp_id_count++;
-
-		if (link->panel_cntl == NULL) {
-			DC_ERROR("Failed to create link panel_cntl!\n");
-			goto panel_cntl_create_fail;
-		}
-	}
-
 	enc_init_data.ctx = dc_ctx;
 	bp_funcs->get_src_obj(dc_ctx->dc_bios, link->link_id, 0,
 			      &enc_init_data.encoder);
@@ -643,6 +651,23 @@ static bool construct_phy(struct dc_link *link,
 	link->dc->res_pool->dig_link_enc_count++;
 
 	link->link_enc_hw_inst = link->link_enc->transmitter;
+
+	if (link->dc->res_pool->funcs->panel_cntl_create &&
+		(link->link_id.id == CONNECTOR_ID_EDP ||
+			link->link_id.id == CONNECTOR_ID_LVDS)) {
+		panel_cntl_init_data.ctx = dc_ctx;
+		panel_cntl_init_data.inst = panel_cntl_init_data.ctx->dc_edp_id_count;
+		panel_cntl_init_data.pwrseq_inst = translate_dig_inst_to_pwrseq_inst(link);
+		link->panel_cntl =
+			link->dc->res_pool->funcs->panel_cntl_create(
+								&panel_cntl_init_data);
+		panel_cntl_init_data.ctx->dc_edp_id_count++;
+
+		if (link->panel_cntl == NULL) {
+			DC_ERROR("Failed to create link panel_cntl!\n");
+			goto panel_cntl_create_fail;
+		}
+	}
 	for (i = 0; i < 4; i++) {
 		if (bp_funcs->get_device_tag(dc_ctx->dc_bios,
 					     link->link_id, i,
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.h b/drivers/gpu/drm/amd/display/dc/link/link_validation.h
index 4a954317d0da..595fb05946e9 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.h
@@ -25,6 +25,7 @@
 #ifndef __LINK_VALIDATION_H__
 #define __LINK_VALIDATION_H__
 #include "link.h"
+
 enum dc_status link_validate_mode_timing(
 		const struct dc_stream_state *stream,
 		struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
index db87aa7b5c90..289f5d133342 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
@@ -412,12 +412,18 @@ static enum dc_link_rate get_cable_max_link_rate(struct dc_link *link)
 {
 	enum dc_link_rate cable_max_link_rate = LINK_RATE_UNKNOWN;
 
-	if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20)
+	if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20) {
 		cable_max_link_rate = LINK_RATE_UHBR20;
-	else if (link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY)
+	} else if (link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY) {
 		cable_max_link_rate = LINK_RATE_UHBR13_5;
-	else if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR10)
-		cable_max_link_rate = LINK_RATE_UHBR10;
+	} else if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR10) {
+		// allow DP40 cables to do UHBR13.5 for passive or unknown cable type
+		if (link->dpcd_caps.cable_id.bits.CABLE_TYPE < 2) {
+			cable_max_link_rate = LINK_RATE_UHBR13_5;
+		} else {
+			cable_max_link_rate = LINK_RATE_UHBR10;
+		}
+	}
 
 	return cable_max_link_rate;
 }
@@ -1392,7 +1398,7 @@ static bool get_usbc_cable_id(struct dc_link *link, union dp_cable_id *cable_id)
 	cmd.cable_id.header.payload_bytes = sizeof(cmd.cable_id.data);
 	cmd.cable_id.data.input.phy_inst = resource_transmitter_to_phy_idx(
 			link->dc, link->link_enc->transmitter);
-	if (dm_execute_dmub_cmd(link->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	if (dc_wake_and_execute_dmub_cmd(link->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
 			cmd.cable_id.header.ret_status == 1) {
 		cable_id->raw = cmd.cable_id.data.output_raw;
 		DC_LOG_DC("usbc_cable_id = %d.\n", cable_id->raw);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
index 0bb749133909..982eda3c46f5 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
@@ -90,7 +90,8 @@ bool dpia_query_hpd_status(struct dc_link *link)
 	cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA;
 
 	/* Return HPD status reported by DMUB if query successfully executed. */
-	if (dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && cmd.query_hpd.data.status == AUX_RET_SUCCESS)
+	if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	    cmd.query_hpd.data.status == AUX_RET_SUCCESS)
 		is_hpd_high = cmd.query_hpd.data.result;
 
 	DC_LOG_DEBUG("%s: link(%d) dpia(%d) cmd_status(%d) result(%d)\n",
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
index 7581023daa47..a7aa8c9da868 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
@@ -50,6 +50,7 @@ static bool get_bw_alloc_proceed_flag(struct dc_link *tmp)
 			&& tmp->hpd_status
 			&& tmp->dpia_bw_alloc_config.bw_alloc_enabled);
 }
+
 static void reset_bw_alloc_struct(struct dc_link *link)
 {
 	link->dpia_bw_alloc_config.bw_alloc_enabled = false;
@@ -58,7 +59,13 @@ static void reset_bw_alloc_struct(struct dc_link *link)
 	link->dpia_bw_alloc_config.estimated_bw = 0;
 	link->dpia_bw_alloc_config.bw_granularity = 0;
 	link->dpia_bw_alloc_config.response_ready = false;
+	link->dpia_bw_alloc_config.sink_allocated_bw = 0;
 }
+
+#define BW_GRANULARITY_0 4 // 0.25 Gbps
+#define BW_GRANULARITY_1 2 // 0.5 Gbps
+#define BW_GRANULARITY_2 1 // 1 Gbps
+
 static uint8_t get_bw_granularity(struct dc_link *link)
 {
 	uint8_t bw_granularity = 0;
@@ -71,16 +78,20 @@ static uint8_t get_bw_granularity(struct dc_link *link)
 
 	switch (bw_granularity & 0x3) {
 	case 0:
-		bw_granularity = 4;
+		bw_granularity = BW_GRANULARITY_0;
 		break;
 	case 1:
+		bw_granularity = BW_GRANULARITY_1;
+		break;
+	case 2:
 	default:
-		bw_granularity = 2;
+		bw_granularity = BW_GRANULARITY_2;
 		break;
 	}
 
 	return bw_granularity;
 }
+
 static int get_estimated_bw(struct dc_link *link)
 {
 	uint8_t bw_estimated_bw = 0;
@@ -93,31 +104,33 @@ static int get_estimated_bw(struct dc_link *link)
 
 	return bw_estimated_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
 }
-static bool allocate_usb4_bw(int *stream_allocated_bw, int bw_needed, struct dc_link *link)
+
+static int get_non_reduced_max_link_rate(struct dc_link *link)
 {
-	if (bw_needed > 0)
-		*stream_allocated_bw += bw_needed;
+	uint8_t nrd_max_link_rate = 0;
 
-	return true;
+	core_link_read_dpcd(
+			link,
+			DP_TUNNELING_MAX_LINK_RATE,
+			&nrd_max_link_rate,
+			sizeof(uint8_t));
+
+	return nrd_max_link_rate;
 }
-static bool deallocate_usb4_bw(int *stream_allocated_bw, int bw_to_dealloc, struct dc_link *link)
-{
-	bool ret = false;
 
-	if (*stream_allocated_bw > 0) {
-		*stream_allocated_bw -= bw_to_dealloc;
-		ret = true;
-	} else {
-		//Do nothing for now
-		ret = true;
-	}
+static int get_non_reduced_max_lane_count(struct dc_link *link)
+{
+	uint8_t nrd_max_lane_count = 0;
 
-	// Unplug so reset values
-	if (!link->hpd_status)
-		reset_bw_alloc_struct(link);
+	core_link_read_dpcd(
+			link,
+			DP_TUNNELING_MAX_LANE_COUNT,
+			&nrd_max_lane_count,
+			sizeof(uint8_t));
 
-	return ret;
+	return nrd_max_lane_count;
 }
+
 /*
  * Read all New BW alloc configuration ex: estimated_bw, allocated_bw,
  * granuality, Driver_ID, CM_Group, & populate the BW allocation structs
@@ -125,10 +138,22 @@ static bool deallocate_usb4_bw(int *stream_allocated_bw, int bw_to_dealloc, stru
  */
 static void init_usb4_bw_struct(struct dc_link *link)
 {
-	// Init the known values
+	reset_bw_alloc_struct(link);
+
+	/* init the known values */
 	link->dpia_bw_alloc_config.bw_granularity = get_bw_granularity(link);
 	link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link);
+	link->dpia_bw_alloc_config.nrd_max_link_rate = get_non_reduced_max_link_rate(link);
+	link->dpia_bw_alloc_config.nrd_max_lane_count = get_non_reduced_max_lane_count(link);
+
+	DC_LOG_DEBUG("%s: bw_granularity(%d), estimated_bw(%d)\n",
+		__func__, link->dpia_bw_alloc_config.bw_granularity,
+		link->dpia_bw_alloc_config.estimated_bw);
+	DC_LOG_DEBUG("%s: nrd_max_link_rate(%d), nrd_max_lane_count(%d)\n",
+		__func__, link->dpia_bw_alloc_config.nrd_max_link_rate,
+		link->dpia_bw_alloc_config.nrd_max_lane_count);
 }
+
 static uint8_t get_lowest_dpia_index(struct dc_link *link)
 {
 	const struct dc *dc_struct = link->dc;
@@ -141,51 +166,66 @@ static uint8_t get_lowest_dpia_index(struct dc_link *link)
 				dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
 			continue;
 
-		if (idx > dc_struct->links[i]->link_index)
+		if (idx > dc_struct->links[i]->link_index) {
 			idx = dc_struct->links[i]->link_index;
+			break;
+		}
 	}
 
 	return idx;
 }
+
 /*
- * Get the Max Available BW or Max Estimated BW for each Host Router
+ * Get the maximum dp tunnel banwidth of host router
  *
- * @link: pointer to the dc_link struct instance
- * @type: ESTIMATD BW or MAX AVAILABLE BW
+ * @dc: pointer to the dc struct instance
+ * @hr_index: host router index
  *
- * return: response_ready flag from dc_link struct
+ * return: host router maximum dp tunnel bandwidth
  */
-static int get_host_router_total_bw(struct dc_link *link, uint8_t type)
+static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_index)
 {
-	const struct dc *dc_struct = link->dc;
-	uint8_t lowest_dpia_index = get_lowest_dpia_index(link);
-	uint8_t idx = (link->link_index - lowest_dpia_index) / 2, idx_temp = 0;
-	struct dc_link *link_temp;
+	uint8_t lowest_dpia_index = get_lowest_dpia_index(dc->links[0]);
+	uint8_t hr_index_temp = 0;
+	struct dc_link *link_dpia_primary, *link_dpia_secondary;
 	int total_bw = 0;
-	int i;
-
-	for (i = 0; i < MAX_PIPES * 2; ++i) {
 
-		if (!dc_struct->links[i] || dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
-			continue;
+	for (uint8_t i = 0; i < MAX_PIPES * 2; ++i) {
 
-		link_temp = dc_struct->links[i];
-		if (!link_temp || !link_temp->hpd_status)
+		if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
 			continue;
 
-		idx_temp = (link_temp->link_index - lowest_dpia_index) / 2;
-
-		if (idx_temp == idx) {
-
-			if (type == HOST_ROUTER_BW_ESTIMATED)
-				total_bw += link_temp->dpia_bw_alloc_config.estimated_bw;
-			else if (type == HOST_ROUTER_BW_ALLOCATED)
-				total_bw += link_temp->dpia_bw_alloc_config.sink_allocated_bw;
+		hr_index_temp = (dc->links[i]->link_index - lowest_dpia_index) / 2;
+
+		if (hr_index_temp == hr_index) {
+			link_dpia_primary = dc->links[i];
+			link_dpia_secondary = dc->links[i + 1];
+
+			/**
+			 * If BW allocation enabled on both DPIAs, then
+			 * HR BW = Estimated(dpia_primary) + Allocated(dpia_secondary)
+			 * otherwise HR BW = Estimated(bw alloc enabled dpia)
+			 */
+			if ((link_dpia_primary->hpd_status &&
+				link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) &&
+				(link_dpia_secondary->hpd_status &&
+				link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) {
+				total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw +
+					link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw;
+			} else if (link_dpia_primary->hpd_status &&
+					link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) {
+				total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw;
+			} else if (link_dpia_secondary->hpd_status &&
+				link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled) {
+				total_bw += link_dpia_secondary->dpia_bw_alloc_config.estimated_bw;
+			}
+			break;
 		}
 	}
 
 	return total_bw;
 }
+
 /*
  * Cleanup function for when the dpia is unplugged to reset struct
  * and perform any required clean up
@@ -194,42 +234,49 @@ static int get_host_router_total_bw(struct dc_link *link, uint8_t type)
  *
  * return: none
  */
-static bool dpia_bw_alloc_unplug(struct dc_link *link)
+static void dpia_bw_alloc_unplug(struct dc_link *link)
 {
-	if (!link)
-		return true;
-
-	return deallocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw,
-			link->dpia_bw_alloc_config.sink_allocated_bw, link);
+	if (link) {
+		DC_LOG_DEBUG("%s: resetting bw alloc config for link(%d)\n",
+			__func__, link->link_index);
+		reset_bw_alloc_struct(link);
+	}
 }
+
 static void set_usb4_req_bw_req(struct dc_link *link, int req_bw)
 {
 	uint8_t requested_bw;
 	uint32_t temp;
 
-	// 1. Add check for this corner case #1
-	if (req_bw > link->dpia_bw_alloc_config.estimated_bw)
+	/* Error check whether request bw greater than allocated */
+	if (req_bw > link->dpia_bw_alloc_config.estimated_bw) {
+		DC_LOG_ERROR("%s: Request bw greater than estimated bw for link(%d)\n",
+			__func__, link->link_index);
 		req_bw = link->dpia_bw_alloc_config.estimated_bw;
+	}
 
 	temp = req_bw * link->dpia_bw_alloc_config.bw_granularity;
 	requested_bw = temp / Kbps_TO_Gbps;
 
-	// Always make sure to add more to account for floating points
+	/* Always make sure to add more to account for floating points */
 	if (temp % Kbps_TO_Gbps)
 		++requested_bw;
 
-	// 2. Add check for this corner case #2
+	/* Error check whether requested and allocated are equal */
 	req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
-	if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw)
-		return;
+	if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) {
+		DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n",
+			__func__, link->link_index);
+	}
 
-	if (core_link_write_dpcd(
+	link->dpia_bw_alloc_config.response_ready = false; // Reset flag
+	core_link_write_dpcd(
 		link,
 		REQUESTED_BW,
 		&requested_bw,
-		sizeof(uint8_t)) == DC_OK)
-		link->dpia_bw_alloc_config.response_ready = false; // Reset flag
+		sizeof(uint8_t));
 }
+
 /*
  * Return the response_ready flag from dc_link struct
  *
@@ -241,6 +288,7 @@ static bool get_cm_response_ready_flag(struct dc_link *link)
 {
 	return link->dpia_bw_alloc_config.response_ready;
 }
+
 // ------------------------------------------------------------------
 //					PUBLIC FUNCTIONS
 // ------------------------------------------------------------------
@@ -277,27 +325,27 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link)
 				DPTX_BW_ALLOCATION_MODE_CONTROL,
 				&response,
 				sizeof(uint8_t)) != DC_OK) {
-			DC_LOG_DEBUG("%s: **** FAILURE Enabling DPtx BW Allocation Mode Support ***\n",
-					__func__);
+			DC_LOG_DEBUG("%s: FAILURE Enabling DPtx BW Allocation Mode Support for link(%d)\n",
+				__func__, link->link_index);
 		} else {
 			// SUCCESS Enabled DPtx BW Allocation Mode Support
-			link->dpia_bw_alloc_config.bw_alloc_enabled = true;
-			DC_LOG_DEBUG("%s: **** SUCCESS Enabling DPtx BW Allocation Mode Support ***\n",
-					__func__);
+			DC_LOG_DEBUG("%s: SUCCESS Enabling DPtx BW Allocation Mode Support for link(%d)\n",
+				__func__, link->link_index);
 
 			ret = true;
 			init_usb4_bw_struct(link);
+			link->dpia_bw_alloc_config.bw_alloc_enabled = true;
 		}
 	}
 
 out:
 	return ret;
 }
+
 void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result)
 {
 	int bw_needed = 0;
 	int estimated = 0;
-	int host_router_total_estimated_bw = 0;
 
 	if (!get_bw_alloc_proceed_flag((link)))
 		return;
@@ -306,14 +354,22 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
 
 	case DPIA_BW_REQ_FAILED:
 
-		DC_LOG_DEBUG("%s: *** *** BW REQ FAILURE for DP-TX Request *** ***\n", __func__);
+		/*
+		 * Ideally, we shouldn't run into this case as we always validate available
+		 * bandwidth and request within that limit
+		 */
+		estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
+
+		DC_LOG_ERROR("%s: BW REQ FAILURE for DP-TX Request for link(%d)\n",
+			__func__, link->link_index);
+		DC_LOG_ERROR("%s: current estimated_bw(%d), new estimated_bw(%d)\n",
+			__func__, link->dpia_bw_alloc_config.estimated_bw, estimated);
 
-		// Update the new Estimated BW value updated by CM
-		link->dpia_bw_alloc_config.estimated_bw =
-				bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
+		/* Update the new Estimated BW value updated by CM */
+		link->dpia_bw_alloc_config.estimated_bw = estimated;
 
+		/* Allocate the previously requested bandwidth */
 		set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.estimated_bw);
-		link->dpia_bw_alloc_config.response_ready = false;
 
 		/*
 		 * If FAIL then it is either:
@@ -326,68 +382,34 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
 
 	case DPIA_BW_REQ_SUCCESS:
 
-		DC_LOG_DEBUG("%s: *** BW REQ SUCCESS for DP-TX Request ***\n", __func__);
-
-		// 1. SUCCESS 1st time before any Pruning is done
-		// 2. SUCCESS after prev. FAIL before any Pruning is done
-		// 3. SUCCESS after Pruning is done but before enabling link
-
 		bw_needed = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
 
-		// 1.
-		if (!link->dpia_bw_alloc_config.sink_allocated_bw) {
-
-			allocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed, link);
-			link->dpia_bw_alloc_config.sink_verified_bw =
-					link->dpia_bw_alloc_config.sink_allocated_bw;
+		DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n",
+			__func__, link->link_index);
+		DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n",
+			__func__, link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed);
 
-			// SUCCESS from first attempt
-			if (link->dpia_bw_alloc_config.sink_allocated_bw >
-			link->dpia_bw_alloc_config.sink_max_bw)
-				link->dpia_bw_alloc_config.sink_verified_bw =
-						link->dpia_bw_alloc_config.sink_max_bw;
-		}
-		// 3.
-		else if (link->dpia_bw_alloc_config.sink_allocated_bw) {
-
-			// Find out how much do we need to de-alloc
-			if (link->dpia_bw_alloc_config.sink_allocated_bw > bw_needed)
-				deallocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw,
-						link->dpia_bw_alloc_config.sink_allocated_bw - bw_needed, link);
-			else
-				allocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw,
-						bw_needed - link->dpia_bw_alloc_config.sink_allocated_bw, link);
-		}
-
-		// 4. If this is the 2nd sink then any unused bw will be reallocated to master DPIA
-		// => check if estimated_bw changed
+		link->dpia_bw_alloc_config.sink_allocated_bw = bw_needed;
 
 		link->dpia_bw_alloc_config.response_ready = true;
 		break;
 
 	case DPIA_EST_BW_CHANGED:
 
-		DC_LOG_DEBUG("%s: *** ESTIMATED BW CHANGED for DP-TX Request ***\n", __func__);
-
 		estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
-		host_router_total_estimated_bw = get_host_router_total_bw(link, HOST_ROUTER_BW_ESTIMATED);
 
-		// 1. If due to unplug of other sink
-		if (estimated == host_router_total_estimated_bw) {
-			// First update the estimated & max_bw fields
-			if (link->dpia_bw_alloc_config.estimated_bw < estimated)
-				link->dpia_bw_alloc_config.estimated_bw = estimated;
-		}
-		// 2. If due to realloc bw btw 2 dpia due to plug OR realloc unused Bw
-		else {
-			// We lost estimated bw usually due to plug event of other dpia
-			link->dpia_bw_alloc_config.estimated_bw = estimated;
-		}
+		DC_LOG_DEBUG("%s: ESTIMATED BW CHANGED for link(%d)\n",
+			__func__, link->link_index);
+		DC_LOG_DEBUG("%s: current estimated_bw(%d), new estimated_bw(%d)\n",
+			__func__, link->dpia_bw_alloc_config.estimated_bw, estimated);
+
+		link->dpia_bw_alloc_config.estimated_bw = estimated;
 		break;
 
 	case DPIA_BW_ALLOC_CAPS_CHANGED:
 
-		DC_LOG_DEBUG("%s: *** BW ALLOC CAPABILITY CHANGED for DP-TX Request ***\n", __func__);
+		DC_LOG_ERROR("%s: BW ALLOC CAPABILITY CHANGED to Disabled for link(%d)\n",
+			__func__, link->link_index);
 		link->dpia_bw_alloc_config.bw_alloc_enabled = false;
 		break;
 	}
@@ -409,17 +431,17 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
 		set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw);
 
 		do {
-			if (!(timeout > 0))
+			if (timeout > 0)
 				timeout--;
 			else
 				break;
-			fsleep(10 * 1000);
+			msleep(10);
 		} while (!get_cm_response_ready_flag(link));
 
 		if (!timeout)
 			ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
 		else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
-			ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED);
+			ret = link->dpia_bw_alloc_config.sink_allocated_bw;
 	}
 	//2. Cold Unplug
 	else if (!link->hpd_status)
@@ -428,63 +450,74 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
 out:
 	return ret;
 }
-int link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw)
+
+bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw)
 {
-	int ret = 0;
+	bool ret = false;
 	uint8_t timeout = 10;
 
+	DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n",
+		__func__, link->link_index, link->hpd_status,
+		link->dpia_bw_alloc_config.sink_allocated_bw, req_bw);
+
 	if (!get_bw_alloc_proceed_flag(link))
 		goto out;
 
-	/*
-	 * Sometimes stream uses same timing parameters as the already
-	 * allocated max sink bw so no need to re-alloc
-	 */
-	if (req_bw != link->dpia_bw_alloc_config.sink_allocated_bw) {
-		set_usb4_req_bw_req(link, req_bw);
-		do {
-			if (!(timeout > 0))
-				timeout--;
-			else
-				break;
-			udelay(10 * 1000);
-		} while (!get_cm_response_ready_flag(link));
+	set_usb4_req_bw_req(link, req_bw);
+	do {
+		if (timeout > 0)
+			timeout--;
+		else
+			break;
+		msleep(10);
+	} while (!get_cm_response_ready_flag(link));
 
-		if (!timeout)
-			ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
-		else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
-			ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED);
-	}
+	if (timeout)
+		ret = true;
 
 out:
+	DC_LOG_DEBUG("%s: EXIT: timeout(%d), ret(%d)\n", __func__, timeout, ret);
 	return ret;
 }
+
 bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias)
 {
 	bool ret = true;
-	int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 };
-	uint8_t lowest_dpia_index = 0, dpia_index = 0;
-	uint8_t i;
+	int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 }, host_router_total_dp_bw = 0;
+	uint8_t lowest_dpia_index, i, hr_index;
 
 	if (!num_dpias || num_dpias > MAX_DPIA_NUM)
 		return ret;
 
-	//Get total Host Router BW & Validate against each Host Router max BW
+	lowest_dpia_index = get_lowest_dpia_index(link[0]);
+
+	/* get total Host Router BW with granularity for the given modes */
 	for (i = 0; i < num_dpias; ++i) {
+		int granularity_Gbps = 0;
+		int bw_granularity = 0;
 
 		if (!link[i]->dpia_bw_alloc_config.bw_alloc_enabled)
 			continue;
 
-		lowest_dpia_index = get_lowest_dpia_index(link[i]);
 		if (link[i]->link_index < lowest_dpia_index)
 			continue;
 
-		dpia_index = (link[i]->link_index - lowest_dpia_index) / 2;
-		bw_needed_per_hr[dpia_index] += bw_needed_per_dpia[i];
-		if (bw_needed_per_hr[dpia_index] > get_host_router_total_bw(link[i], HOST_ROUTER_BW_ALLOCATED)) {
+		granularity_Gbps = (Kbps_TO_Gbps / link[i]->dpia_bw_alloc_config.bw_granularity);
+		bw_granularity = (bw_needed_per_dpia[i] / granularity_Gbps) * granularity_Gbps +
+				((bw_needed_per_dpia[i] % granularity_Gbps) ? granularity_Gbps : 0);
 
-			ret = false;
-			break;
+		hr_index = (link[i]->link_index - lowest_dpia_index) / 2;
+		bw_needed_per_hr[hr_index] += bw_granularity;
+	}
+
+	/* validate against each Host Router max BW */
+	for (hr_index = 0; hr_index < MAX_HR_NUM; ++hr_index) {
+		if (bw_needed_per_hr[hr_index]) {
+			host_router_total_dp_bw = get_host_router_total_dp_tunnel_bw(link[0]->dc, hr_index);
+			if (bw_needed_per_hr[hr_index] > host_router_total_dp_bw) {
+				ret = false;
+				break;
+			}
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
index 7292690383ae..981bc4eb6120 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
@@ -59,9 +59,9 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link);
  * @link: pointer to the dc_link struct instance
  * @req_bw: Bw requested by the stream
  *
- * return: allocated bw else return 0
+ * return: true if allocated successfully
  */
-int link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw);
+bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw);
 
 /*
  * Handle the USB4 BW Allocation related functionality here:
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
index 0c00e94e90b1..ba69874be5a4 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
@@ -190,9 +190,6 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link)
 	/*AMD Replay version reuse DP_PSR_ERROR_STATUS for REPLAY_ERROR status.*/
 	union psr_error_status replay_error_status;
 
-	if (link->replay_settings.config.force_disable_desync_error_check)
-		return;
-
 	if (!link->replay_settings.replay_feature_enabled)
 		return;
 
@@ -210,9 +207,6 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link)
 		&replay_error_status.raw,
 		sizeof(replay_error_status.raw));
 
-	if (replay_configuration.bits.DESYNC_ERROR_STATUS)
-		link->replay_settings.config.received_desync_error_hpd = 1;
-
 	link->replay_settings.config.replay_error_status.bits.LINK_CRC_ERROR =
 		replay_error_status.bits.LINK_CRC_ERROR;
 	link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR =
@@ -225,6 +219,12 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link)
 		link->replay_settings.config.replay_error_status.bits.STATE_TRANSITION_ERROR) {
 		bool allow_active;
 
+		if (link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR)
+			link->replay_settings.config.received_desync_error_hpd = 1;
+
+		if (link->replay_settings.config.force_disable_desync_error_check)
+			return;
+
 		/* Acknowledge and clear configuration bits */
 		dm_helpers_dp_write_dpcd(
 			link->ctx,
@@ -265,7 +265,7 @@ void dp_handle_link_loss(struct dc_link *link)
 
 	for (i = count - 1; i >= 0; i--) {
 		// Always use max settings here for DP 1.4a LL Compliance CTS
-		if (link->is_automated) {
+		if (link->skip_fallback_on_link_loss) {
 			pipes[i]->link_config.dp_link_settings.lane_count =
 					link->verified_link_cap.lane_count;
 			pipes[i]->link_config.dp_link_settings.link_rate =
@@ -404,7 +404,9 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link,
 
 	if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
 		// Workaround for DP 1.4a LL Compliance CTS as USB4 has to share encoders unlike DP and USBC
-		link->is_automated = true;
+		if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+			link->skip_fallback_on_link_loss = true;
+
 		device_service_clear.bits.AUTOMATED_TEST = 1;
 		core_link_write_dpcd(
 			link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
index 90339c2dfd84..5a0b04518956 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
@@ -807,7 +807,7 @@ void dp_decide_lane_settings(
 		const struct link_training_settings *lt_settings,
 		const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
 		struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
-		union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX])
+		union dpcd_training_lane *dpcd_lane_settings)
 {
 	uint32_t lane;
 
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
index 7d027bac8255..851bd17317a0 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
@@ -111,7 +111,7 @@ void dp_decide_lane_settings(
 	const struct link_training_settings *lt_settings,
 	const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
 	struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
-	union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]);
+	union dpcd_training_lane *dpcd_lane_settings);
 
 enum dc_dp_training_pattern decide_cr_training_pattern(
 		const struct dc_link_settings *link_settings);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
index 4f4e899e5c46..e8dda44b23cb 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
@@ -811,7 +811,7 @@ static enum link_training_result dpia_training_eq_transparent(
 			/* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4
 			 * has to share encoders unlike DP and USBC
 			 */
-			if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->is_automated && retries_eq)) {
+			if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->skip_fallback_on_link_loss && retries_eq)) {
 				result =  LINK_TRAINING_SUCCESS;
 				break;
 			}
@@ -1037,7 +1037,7 @@ enum link_training_result dpia_perform_link_training(
 	 */
 	if (result == LINK_TRAINING_SUCCESS) {
 		fsleep(5000);
-		if (!link->is_automated)
+		if (!link->skip_fallback_on_link_loss)
 			result = dp_check_link_loss_status(link, &lt_settings);
 	} else if (result == LINK_TRAINING_ABORT)
 		dpia_training_abort(link, &lt_settings, repeater_id);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
index fd8f6f198146..7087cdc9e977 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
@@ -115,7 +115,7 @@ static enum link_training_result perform_fixed_vs_pe_nontransparent_training_seq
 		lt_settings->cr_pattern_time = 16000;
 
 	/* Fixed VS/PE specific: Toggle link rate */
-	apply_toggle_rate_wa = (link->vendor_specific_lttpr_link_rate_wa == target_rate);
+	apply_toggle_rate_wa = ((link->vendor_specific_lttpr_link_rate_wa == target_rate) || (link->vendor_specific_lttpr_link_rate_wa == 0));
 	target_rate = get_dpcd_link_rate(&lt_settings->link_settings);
 	toggle_rate = (target_rate == 0x6) ? 0xA : 0x6;
 
@@ -205,6 +205,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy(
 	const uint8_t vendor_lttpr_write_data_4lane_3[4] = {0x1, 0x6D, 0xF2, 0x18};
 	const uint8_t vendor_lttpr_write_data_4lane_4[4] = {0x1, 0x6C, 0xF2, 0x03};
 	const uint8_t vendor_lttpr_write_data_4lane_5[4] = {0x1, 0x03, 0xF3, 0x06};
+	const uint8_t vendor_lttpr_write_data_dpmf[4] = {0x1, 0x6, 0x70, 0x87};
 	enum link_training_result status = LINK_TRAINING_SUCCESS;
 	uint8_t lane = 0;
 	union down_spread_ctrl downspread = {0};
@@ -271,7 +272,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy(
 	/* Vendor specific: Toggle link rate */
 	toggle_rate = (rate == 0x6) ? 0xA : 0x6;
 
-	if (link->vendor_specific_lttpr_link_rate_wa == rate) {
+	if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) {
 		core_link_write_dpcd(
 				link,
 				DP_LINK_BW_SET,
@@ -293,6 +294,10 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy(
 		DP_DOWNSPREAD_CTRL,
 		lt_settings->link_settings.link_spread);
 
+	link_configure_fixed_vs_pe_retimer(link->ddc,
+			&vendor_lttpr_write_data_dpmf[0],
+			sizeof(vendor_lttpr_write_data_dpmf));
+
 	if (lt_settings->link_settings.lane_count == LANE_COUNT_FOUR) {
 		link_configure_fixed_vs_pe_retimer(link->ddc,
 				&vendor_lttpr_write_data_4lane_1[0], sizeof(vendor_lttpr_write_data_4lane_1));
@@ -552,6 +557,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
 	const uint8_t vendor_lttpr_write_data_4lane_3[4] = {0x1, 0x6D, 0xF2, 0x18};
 	const uint8_t vendor_lttpr_write_data_4lane_4[4] = {0x1, 0x6C, 0xF2, 0x03};
 	const uint8_t vendor_lttpr_write_data_4lane_5[4] = {0x1, 0x03, 0xF3, 0x06};
+	const uint8_t vendor_lttpr_write_data_dpmf[4] = {0x1, 0x6, 0x70, 0x87};
 	enum link_training_result status = LINK_TRAINING_SUCCESS;
 	uint8_t lane = 0;
 	union down_spread_ctrl downspread = {0};
@@ -617,7 +623,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
 	/* Vendor specific: Toggle link rate */
 	toggle_rate = (rate == 0x6) ? 0xA : 0x6;
 
-	if (link->vendor_specific_lttpr_link_rate_wa == rate) {
+	if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) {
 		core_link_write_dpcd(
 				link,
 				DP_LINK_BW_SET,
@@ -639,6 +645,10 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
 		DP_DOWNSPREAD_CTRL,
 		lt_settings->link_settings.link_spread);
 
+	link_configure_fixed_vs_pe_retimer(link->ddc,
+			&vendor_lttpr_write_data_dpmf[0],
+			sizeof(vendor_lttpr_write_data_dpmf));
+
 	if (lt_settings->link_settings.lane_count == LANE_COUNT_FOUR) {
 		link_configure_fixed_vs_pe_retimer(link->ddc,
 				&vendor_lttpr_write_data_4lane_1[0], sizeof(vendor_lttpr_write_data_4lane_1));
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index e32a7974a4bc..7f1196528218 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -170,7 +170,6 @@ bool edp_set_backlight_level_nits(struct dc_link *link,
 	*(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits;
 	*(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms;
 
-	link->backlight_settings.backlight_millinits = backlight_millinits;
 
 	if (!link->dpcd_caps.panel_luminance_control) {
 		if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL,
@@ -290,7 +289,7 @@ bool set_default_brightness_aux(struct dc_link *link)
 			default_backlight = 150000;
 		// if < 1 nits or > 5000, it might be wrong readback
 		if (default_backlight < 1000 || default_backlight > 5000000)
-			default_backlight = 150000; //
+			default_backlight = 150000;
 
 		return edp_set_backlight_level_nits(link, true,
 				default_backlight, 0);
@@ -298,15 +297,6 @@ bool set_default_brightness_aux(struct dc_link *link)
 	return false;
 }
 
-bool set_cached_brightness_aux(struct dc_link *link)
-{
-	if (link->backlight_settings.backlight_millinits)
-		return edp_set_backlight_level_nits(link, true,
-						    link->backlight_settings.backlight_millinits, 0);
-	else
-		return set_default_brightness_aux(link);
-	return false;
-}
 bool edp_is_ilr_optimization_enabled(struct dc_link *link)
 {
 	if (link->dpcd_caps.edp_supported_link_rates_count == 0 || !link->panel_config.ilr.optimize_edp_link_rate)
@@ -539,6 +529,9 @@ bool edp_set_backlight_level(const struct dc_link *link,
 	if (dc_is_embedded_signal(link->connector_signal)) {
 		struct pipe_ctx *pipe_ctx = get_pipe_from_link(link);
 
+		if (link->panel_cntl)
+			link->panel_cntl->stored_backlight_registers.USER_LEVEL = backlight_pwm_u16_16;
+
 		if (pipe_ctx) {
 			/* Disable brightness ramping when the display is blanked
 			 * as it can hang the DMCU
@@ -1007,6 +1000,36 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
 	return true;
 }
 
+/*
+ * This is general Interface for Replay to set an 32 bit variable to dmub
+ * replay_FW_Message_type: Indicates which instruction or variable pass to DMUB
+ * cmd_data: Value of the config.
+ */
+bool edp_send_replay_cmd(struct dc_link *link,
+			enum replay_FW_Message_type msg,
+			union dmub_replay_cmd_set *cmd_data)
+{
+	struct dc *dc = link->ctx->dc;
+	struct dmub_replay *replay = dc->res_pool->replay;
+	unsigned int panel_inst;
+
+	if (!replay)
+		return false;
+
+	DC_LOGGER_INIT(link->ctx->logger);
+
+	if (dc_get_edp_link_panel_inst(dc, link, &panel_inst))
+		cmd_data->panel_inst = panel_inst;
+	else {
+		DC_LOG_DC("%s(): get edp panel inst fail ", __func__);
+		return false;
+	}
+
+	replay->funcs->replay_send_cmd(replay, msg, cmd_data);
+
+	return true;
+}
+
 bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal)
 {
 	struct dc *dc = link->ctx->dc;
@@ -1045,6 +1068,33 @@ bool edp_replay_residency(const struct dc_link *link,
 	return true;
 }
 
+bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link,
+	const unsigned int *power_opts, uint16_t coasting_vtotal)
+{
+	struct dc  *dc = link->ctx->dc;
+	struct dmub_replay *replay = dc->res_pool->replay;
+	unsigned int panel_inst;
+
+	if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
+		return false;
+
+	/* Only both power and coasting vtotal changed, this func could return true */
+	if (power_opts && link->replay_settings.replay_power_opt_active != *power_opts &&
+		coasting_vtotal && link->replay_settings.coasting_vtotal != coasting_vtotal) {
+		if (link->replay_settings.replay_feature_enabled &&
+			replay->funcs->replay_set_power_opt_and_coasting_vtotal) {
+			replay->funcs->replay_set_power_opt_and_coasting_vtotal(replay,
+				*power_opts, panel_inst, coasting_vtotal);
+			link->replay_settings.replay_power_opt_active = *power_opts;
+			link->replay_settings.coasting_vtotal = coasting_vtotal;
+		} else
+			return false;
+	} else
+		return false;
+
+	return true;
+}
+
 static struct abm *get_abm_from_stream_res(const struct dc_link *link)
 {
 	int i;
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
index ebf7deb63d13..34e521af7bb4 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
@@ -30,7 +30,6 @@
 enum dp_panel_mode dp_get_panel_mode(struct dc_link *link);
 void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode);
 bool set_default_brightness_aux(struct dc_link *link);
-bool set_cached_brightness_aux(struct dc_link *link);
 void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd);
 int edp_get_backlight_level(const struct dc_link *link);
 bool edp_get_backlight_level_nits(struct dc_link *link,
@@ -57,10 +56,15 @@ bool edp_set_replay_allow_active(struct dc_link *dc_link, const bool *enable,
 	bool wait, bool force_static, const unsigned int *power_opts);
 bool edp_setup_replay(struct dc_link *link,
 		const struct dc_stream_state *stream);
+bool edp_send_replay_cmd(struct dc_link *link,
+			enum replay_FW_Message_type msg,
+			union dmub_replay_cmd_set *cmd_data);
 bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal);
 bool edp_replay_residency(const struct dc_link *link,
 	unsigned int *residency, const bool is_start, const bool is_alpm);
 bool edp_get_replay_state(const struct dc_link *link, uint64_t *state);
+bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link,
+	const unsigned int *power_opts, uint16_t coasting_vtotal);
 bool edp_wait_for_t12(struct dc_link *link);
 bool edp_is_ilr_optimization_required(struct dc_link *link,
        struct dc_crtc_timing *crtc_timing);
diff --git a/drivers/gpu/drm/amd/display/dc/optc/Makefile b/drivers/gpu/drm/amd/display/dc/optc/Makefile
new file mode 100644
index 000000000000..bb213335fb9f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/optc/Makefile
@@ -0,0 +1,108 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'optc' sub-component of DAL.
+#
+
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+OPTC_DCN10 = dcn10_optc.o
+
+AMD_DAL_OPTC_DCN10 = $(addprefix $(AMDDALPATH)/dc/optc/dcn10/,$(OPTC_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN10)
+
+###############################################################################
+
+OPTC_DCN20 = dcn20_optc.o
+
+AMD_DAL_OPTC_DCN20 = $(addprefix $(AMDDALPATH)/dc/optc/dcn20/,$(OPTC_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN20)
+
+###############################################################################
+
+OPTC_DCN201 = dcn201_optc.o
+
+AMD_DAL_OPTC_DCN201 = $(addprefix $(AMDDALPATH)/dc/optc/dcn201/,$(OPTC_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN201)
+
+###############################################################################
+
+###############################################################################
+
+###############################################################################
+
+OPTC_DCN30 = dcn30_optc.o
+
+AMD_DAL_OPTC_DCN30 = $(addprefix $(AMDDALPATH)/dc/optc/dcn30/,$(OPTC_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN30)
+
+###############################################################################
+
+OPTC_DCN301 = dcn301_optc.o
+
+AMD_DAL_OPTC_DCN301 = $(addprefix $(AMDDALPATH)/dc/optc/dcn301/,$(OPTC_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN301)
+
+###############################################################################
+
+OPTC_DCN31 = dcn31_optc.o
+
+AMD_DAL_OPTC_DCN31 = $(addprefix $(AMDDALPATH)/dc/optc/dcn31/,$(OPTC_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN31)
+
+###############################################################################
+
+OPTC_DCN314 = dcn314_optc.o
+
+AMD_DAL_OPTC_DCN314 = $(addprefix $(AMDDALPATH)/dc/optc/dcn314/,$(OPTC_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN314)
+
+###############################################################################
+
+OPTC_DCN32 = dcn32_optc.o
+
+AMD_DAL_OPTC_DCN32 = $(addprefix $(AMDDALPATH)/dc/optc/dcn32/,$(OPTC_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN32)
+
+###############################################################################
+
+OPTC_DCN35 = dcn35_optc.o
+
+AMD_DAL_OPTC_DCN35 = $(addprefix $(AMDDALPATH)/dc/optc/dcn35/,$(OPTC_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN35)
+
+###############################################################################
+
+###############################################################################
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
index 0e8f4f36c87c..0e8f4f36c87c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h
index ab81594a7fad..ab81594a7fad 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c
index 58bdbd859bf9..58bdbd859bf9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h
index f7968b9ca16e..c2e03ced392e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h
@@ -26,7 +26,7 @@
 #ifndef __DC_OPTC_DCN20_H__
 #define __DC_OPTC_DCN20_H__
 
-#include "../dcn10/dcn10_optc.h"
+#include "dcn10/dcn10_optc.h"
 
 #define TG_COMMON_REG_LIST_DCN2_0(inst) \
 	TG_COMMON_REG_LIST_DCN(inst),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c
index 70fcbec03fb6..70fcbec03fb6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h
index e9545b73513a..e9545b73513a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c
index b97bdb868a0e..b97bdb868a0e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h
index d3a056c12b0d..d3a056c12b0d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c
index b3cfcb887905..b3cfcb887905 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h
index b49585682a15..b49585682a15 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c
index 63a677c8ee27..63a677c8ee27 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h
index 30b81a448ce2..30b81a448ce2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c
index 0086cafb0f7a..0086cafb0f7a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h
index 99c098e76116..99c098e76116 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
index a2c4db2cebdd..91ea0d4da06a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
@@ -172,6 +172,13 @@ static bool optc32_disable_crtc(struct timing_generator *optc)
 	REG_UPDATE(OTG_CONTROL,
 			OTG_MASTER_EN, 0);
 
+	REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+			OPTC_SEG0_SRC_SEL, 0xf,
+			OPTC_SEG1_SRC_SEL, 0xf,
+			OPTC_SEG2_SRC_SEL, 0xf,
+			OPTC_SEG3_SRC_SEL, 0xf,
+			OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
 	REG_UPDATE(CONTROL,
 			VTG0_ENABLE, 0);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h
index 8ce3b178cab0..8ce3b178cab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
index a4a39f1638cf..08a59cf449ca 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
@@ -144,6 +144,13 @@ static bool optc35_disable_crtc(struct timing_generator *optc)
 	REG_UPDATE(OTG_CONTROL,
 			OTG_MASTER_EN, 0);
 
+	REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+			OPTC_SEG0_SRC_SEL, 0xf,
+			OPTC_SEG1_SRC_SEL, 0xf,
+			OPTC_SEG2_SRC_SEL, 0xf,
+			OPTC_SEG3_SRC_SEL, 0xf,
+			OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
 	REG_UPDATE(CONTROL,
 			VTG0_ENABLE, 0);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h
index 1f422e4c468f..1f422e4c468f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/resource/Makefile b/drivers/gpu/drm/amd/display/dc/resource/Makefile
new file mode 100644
index 000000000000..0a75ed8962a5
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/Makefile
@@ -0,0 +1,199 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'resource' sub-component of DAL.
+#
+
+
+###############################################################################
+#  DCE
+###############################################################################
+
+RESOURCE_DCE100 = dce100_resource.o
+
+AMD_DAL_RESOURCE_DCE100 = $(addprefix $(AMDDALPATH)/dc/resource/dce100/,$(RESOURCE_DCE100))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE100)
+
+###############################################################################
+
+RESOURCE_DCE110 = dce110_resource.o
+
+AMD_DAL_RESOURCE_DCE110 = $(addprefix $(AMDDALPATH)/dc/resource/dce110/,$(RESOURCE_DCE110))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE110)
+
+###############################################################################
+
+RESOURCE_DCE112 = dce112_resource.o
+
+AMD_DAL_RESOURCE_DCE112 = $(addprefix $(AMDDALPATH)/dc/resource/dce112/,$(RESOURCE_DCE112))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE112)
+
+###############################################################################
+
+RESOURCE_DCE120 = dce120_resource.o
+
+AMD_DAL_RESOURCE_DCE120 = $(addprefix $(AMDDALPATH)/dc/resource/dce120/,$(RESOURCE_DCE120))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE120)
+
+###############################################################################
+
+RESOURCE_DCE80 = dce80_resource.o
+
+AMD_DAL_RESOURCE_DCE80 = $(addprefix $(AMDDALPATH)/dc/resource/dce80/,$(RESOURCE_DCE80))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE80)
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+RESOURCE_DCN10 = dcn10_resource.o
+
+AMD_DAL_RESOURCE_DCN10 = $(addprefix $(AMDDALPATH)/dc/resource/dcn10/,$(RESOURCE_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN10)
+
+###############################################################################
+
+RESOURCE_DCN20 = dcn20_resource.o
+
+AMD_DAL_RESOURCE_DCN20 = $(addprefix $(AMDDALPATH)/dc/resource/dcn20/,$(RESOURCE_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN20)
+
+###############################################################################
+
+RESOURCE_DCN201 = dcn201_resource.o
+
+AMD_DAL_RESOURCE_DCN201 = $(addprefix $(AMDDALPATH)/dc/resource/dcn201/,$(RESOURCE_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN201)
+
+###############################################################################
+
+RESOURCE_DCN21 = dcn21_resource.o
+
+AMD_DAL_RESOURCE_DCN21 = $(addprefix $(AMDDALPATH)/dc/resource/dcn21/,$(RESOURCE_DCN21))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN21)
+
+###############################################################################
+
+###############################################################################
+
+###############################################################################
+
+RESOURCE_DCN30 = dcn30_resource.o
+
+AMD_DAL_RESOURCE_DCN30 = $(addprefix $(AMDDALPATH)/dc/resource/dcn30/,$(RESOURCE_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN30)
+
+###############################################################################
+
+RESOURCE_DCN301 = dcn301_resource.o
+
+AMD_DAL_RESOURCE_DCN301 = $(addprefix $(AMDDALPATH)/dc/resource/dcn301/,$(RESOURCE_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN301)
+
+###############################################################################
+
+RESOURCE_DCN302 = dcn302_resource.o
+
+AMD_DAL_RESOURCE_DCN302 = $(addprefix $(AMDDALPATH)/dc/resource/dcn302/,$(RESOURCE_DCN302))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN302)
+
+###############################################################################
+
+RESOURCE_DCN303 = dcn303_resource.o
+
+AMD_DAL_RESOURCE_DCN303 = $(addprefix $(AMDDALPATH)/dc/resource/dcn303/,$(RESOURCE_DCN303))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN303)
+
+###############################################################################
+
+RESOURCE_DCN31 = dcn31_resource.o
+
+AMD_DAL_RESOURCE_DCN31 = $(addprefix $(AMDDALPATH)/dc/resource/dcn31/,$(RESOURCE_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN31)
+
+###############################################################################
+
+RESOURCE_DCN314 = dcn314_resource.o
+
+AMD_DAL_RESOURCE_DCN314 = $(addprefix $(AMDDALPATH)/dc/resource/dcn314/,$(RESOURCE_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN314)
+
+###############################################################################
+
+RESOURCE_DCN315 = dcn315_resource.o
+
+AMD_DAL_RESOURCE_DCN315 = $(addprefix $(AMDDALPATH)/dc/resource/dcn315/,$(RESOURCE_DCN315))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN315)
+
+###############################################################################
+
+RESOURCE_DCN316 = dcn316_resource.o
+
+AMD_DAL_RESOURCE_DCN316 = $(addprefix $(AMDDALPATH)/dc/resource/dcn316/,$(RESOURCE_DCN316))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN316)
+
+###############################################################################
+
+RESOURCE_DCN32 = dcn32_resource.o
+
+AMD_DAL_RESOURCE_DCN32 = $(addprefix $(AMDDALPATH)/dc/resource/dcn32/,$(RESOURCE_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN32)
+
+###############################################################################
+
+RESOURCE_DCN321 = dcn321_resource.o
+
+AMD_DAL_RESOURCE_DCN321 = $(addprefix $(AMDDALPATH)/dc/resource/dcn321/,$(RESOURCE_DCN321))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN321)
+
+###############################################################################
+
+RESOURCE_DCN35 = dcn35_resource.o
+
+AMD_DAL_RESOURCE_DCN35 = $(addprefix $(AMDDALPATH)/dc/resource/dcn35/,$(RESOURCE_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN35)
+
+###############################################################################
+
+###############################################################################
+
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
index 53a5f4cb648c..53a5f4cb648c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h
index fecab7c560f5..fecab7c560f5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
index fe518fd27b08..fe518fd27b08 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h
index aa4531e0800e..aa4531e0800e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
index d1edac46c9a0..d1edac46c9a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h
index 1f57ebc6f9b4..1f57ebc6f9b4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
index 962de79be169..20662edd0ae4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
@@ -36,7 +36,7 @@
 
 #include "dce110/dce110_resource.h"
 #include "virtual/virtual_stream_encoder.h"
-#include "dce120_timing_generator.h"
+#include "dce120/dce120_timing_generator.h"
 #include "irq/dce120/irq_service_dce120.h"
 #include "dce/dce_opp.h"
 #include "dce/dce_clock_source.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h
index 3d1f3cf012f4..3d1f3cf012f4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt b/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt
new file mode 100644
index 000000000000..19dd73bc9ab0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt
@@ -0,0 +1,4 @@
+dal3_subdirectory_sources(
+  dce80_resource.c
+  dce80_resource.h
+  )
+\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
index 35a2cce0c2b8..35a2cce0c2b8 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h
index eff31ab83a39..eff31ab83a39 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
index b94c5c97eee7..d08d10969251 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
@@ -26,29 +26,32 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn10_init.h"
+#include "dcn10/dcn10_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
-#include "dcn10_resource.h"
-#include "dcn10_ipp.h"
-#include "dcn10_mpc.h"
+#include "dcn10/dcn10_resource.h"
+#include "dcn10/dcn10_ipp.h"
+#include "dcn10/dcn10_mpc.h"
+
+#include "dcn10/dcn10_dwb.h"
+
 #include "irq/dcn10/irq_service_dcn10.h"
-#include "dcn10_dpp.h"
-#include "dcn10_optc.h"
+#include "dcn10/dcn10_dpp.h"
+#include "dcn10/dcn10_optc.h"
 #include "dcn10/dcn10_hwseq.h"
 #include "dce110/dce110_hwseq.h"
-#include "dcn10_opp.h"
-#include "dcn10_link_encoder.h"
-#include "dcn10_stream_encoder.h"
+#include "dcn10/dcn10_opp.h"
+#include "dcn10/dcn10_link_encoder.h"
+#include "dcn10/dcn10_stream_encoder.h"
 #include "dce/dce_clock_source.h"
 #include "dce/dce_audio.h"
 #include "dce/dce_hwseq.h"
 #include "virtual/virtual_stream_encoder.h"
 #include "dce110/dce110_resource.h"
 #include "dce112/dce112_resource.h"
-#include "dcn10_hubp.h"
-#include "dcn10_hubbub.h"
+#include "dcn10/dcn10_hubp.h"
+#include "dcn10/dcn10_hubbub.h"
 #include "dce/dce_panel_cntl.h"
 
 #include "soc15_hw_ip.h"
@@ -1247,7 +1250,10 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
 			/* Store first available for MST second display
 			 * in daisy chain use case
 			 */
-			j = i;
+
+			if (pool->stream_enc[i]->id != ENGINE_ID_VIRTUAL)
+				j = i;
+
 			if (link->ep_type == DISPLAY_ENDPOINT_PHY && pool->stream_enc[i]->id ==
 					link->link_enc->preferred_engine)
 				return pool->stream_enc[i];
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h
index bf8e33cd8147..bf8e33cd8147 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
index 0a422fbb14bc..f9c5bc624be3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
@@ -29,7 +29,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn20_init.h"
+#include "dcn20/dcn20_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -39,29 +39,29 @@
 
 #include "dcn10/dcn10_hubp.h"
 #include "dcn10/dcn10_ipp.h"
-#include "dcn20_hubbub.h"
-#include "dcn20_mpc.h"
-#include "dcn20_hubp.h"
+#include "dcn20/dcn20_hubbub.h"
+#include "dcn20/dcn20_mpc.h"
+#include "dcn20/dcn20_hubp.h"
 #include "irq/dcn20/irq_service_dcn20.h"
-#include "dcn20_dpp.h"
-#include "dcn20_optc.h"
+#include "dcn20/dcn20_dpp.h"
+#include "dcn20/dcn20_optc.h"
 #include "dcn20/dcn20_hwseq.h"
 #include "dce110/dce110_hwseq.h"
 #include "dcn10/dcn10_resource.h"
-#include "dcn20_opp.h"
+#include "dcn20/dcn20_opp.h"
 
-#include "dcn20_dsc.h"
+#include "dcn20/dcn20_dsc.h"
 
-#include "dcn20_link_encoder.h"
-#include "dcn20_stream_encoder.h"
+#include "dcn20/dcn20_link_encoder.h"
+#include "dcn20/dcn20_stream_encoder.h"
 #include "dce/dce_clock_source.h"
 #include "dce/dce_audio.h"
 #include "dce/dce_hwseq.h"
 #include "virtual/virtual_stream_encoder.h"
 #include "dce110/dce110_resource.h"
 #include "dml/display_mode_vba.h"
-#include "dcn20_dccg.h"
-#include "dcn20_vmid.h"
+#include "dcn20/dcn20_dccg.h"
+#include "dcn20/dcn20_vmid.h"
 #include "dce/dce_panel_cntl.h"
 
 #include "navi10_ip_offset.h"
@@ -1273,15 +1273,19 @@ static void build_clamping_params(struct dc_stream_state *stream)
 	stream->clamping.pixel_encoding = stream->timing.pixel_encoding;
 }
 
-static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx)
+void dcn20_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx)
 {
-
 	get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params);
-
 	pipe_ctx->clock_source->funcs->get_pix_clk_dividers(
-		pipe_ctx->clock_source,
-		&pipe_ctx->stream_res.pix_clk_params,
-		&pipe_ctx->pll_settings);
+			pipe_ctx->clock_source,
+			&pipe_ctx->stream_res.pix_clk_params,
+			&pipe_ctx->pll_settings);
+}
+
+static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx)
+{
+
+	dcn20_build_pipe_pix_clk_params(pipe_ctx);
 
 	pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
index 37ecaccc5d12..4cee3fa11a7f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
@@ -165,6 +165,7 @@ enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx,
 enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc, struct dc_state *dc_ctx, struct dc_stream_state *dc_stream);
 enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream);
 enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_state);
+void dcn20_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx);
 
 #endif /* __DC_RESOURCE_DCN20_H__ */
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
index bca22d867696..914b234d7f6b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
@@ -26,7 +26,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn201_init.h"
+#include "dcn201/dcn201_init.h"
 #include "dml/dcn20/dcn20_fpu.h"
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -36,16 +36,16 @@
 
 #include "dcn10/dcn10_hubp.h"
 #include "dcn10/dcn10_ipp.h"
-#include "dcn201_mpc.h"
-#include "dcn201_hubp.h"
+#include "dcn201/dcn201_mpc.h"
+#include "dcn201/dcn201_hubp.h"
 #include "irq/dcn201/irq_service_dcn201.h"
 #include "dcn201/dcn201_dpp.h"
 #include "dcn201/dcn201_hubbub.h"
-#include "dcn201_dccg.h"
-#include "dcn201_optc.h"
+#include "dcn201/dcn201_dccg.h"
+#include "dcn201/dcn201_optc.h"
 #include "dcn201/dcn201_hwseq.h"
 #include "dce110/dce110_hwseq.h"
-#include "dcn201_opp.h"
+#include "dcn201/dcn201_opp.h"
 #include "dcn201/dcn201_link_encoder.h"
 #include "dcn20/dcn20_stream_encoder.h"
 #include "dce/dce_clock_source.h"
@@ -55,7 +55,7 @@
 #include "dce110/dce110_resource.h"
 #include "dce/dce_aux.h"
 #include "dce/dce_i2c.h"
-#include "dcn201_hubbub.h"
+#include "dcn201/dcn201_hubbub.h"
 #include "dcn10/dcn10_resource.h"
 
 #include "cyan_skillfish_ip_offset.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h
index e0467d17d4ae..e0467d17d4ae 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
index 42277b280586..65d337731f56 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
@@ -29,7 +29,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn21_init.h"
+#include "dcn21/dcn21_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -44,7 +44,7 @@
 #include "dcn20/dcn20_hubbub.h"
 #include "dcn20/dcn20_mpc.h"
 #include "dcn20/dcn20_hubp.h"
-#include "dcn21_hubp.h"
+#include "dcn21/dcn21_hubp.h"
 #include "irq/dcn21/irq_service_dcn21.h"
 #include "dcn20/dcn20_dpp.h"
 #include "dcn20/dcn20_optc.h"
@@ -61,7 +61,7 @@
 #include "dml/display_mode_vba.h"
 #include "dcn20/dcn20_dccg.h"
 #include "dcn21/dcn21_dccg.h"
-#include "dcn21_hubbub.h"
+#include "dcn21/dcn21_hubbub.h"
 #include "dcn10/dcn10_resource.h"
 #include "dce/dce_panel_cntl.h"
 
@@ -713,9 +713,8 @@ static void dcn21_resource_destruct(struct dcn21_resource_pool *pool)
 			pool->base.hubps[i] = NULL;
 		}
 
-		if (pool->base.irqs != NULL) {
+		if (pool->base.irqs != NULL)
 			dal_irq_service_destroy(&pool->base.irqs);
-		}
 	}
 
 	for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h
index f7ecc002c2f7..f7ecc002c2f7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
index 7b259cb5f418..37a64186f324 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
@@ -27,7 +27,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn30_init.h"
+#include "dcn30/dcn30_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -1682,6 +1682,7 @@ noinline bool dcn30_internal_validate_bw(
 		 * We don't actually support prefetch mode 2, so require that we
 		 * at least support prefetch mode 1.
 		 */
+		context->bw_ctx.dml.validate_max_state = fast_validate;
 		context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank =
 			dm_allow_self_refresh;
 
@@ -1691,6 +1692,7 @@ noinline bool dcn30_internal_validate_bw(
 			memset(merge, 0, sizeof(merge));
 			vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
 		}
+		context->bw_ctx.dml.validate_max_state = false;
 	}
 
 	dml_log_mode_support_params(&context->bw_ctx.dml);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h
index 8e6b8b7368fd..8e6b8b7368fd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
index f3b75f283aa2..511ff6b5b985 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
@@ -27,7 +27,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn301_init.h"
+#include "dcn301/dcn301_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -61,7 +61,7 @@
 #include "dcn10/dcn10_resource.h"
 #include "dcn30/dcn30_dio_stream_encoder.h"
 #include "dcn301/dcn301_dio_link_encoder.h"
-#include "dcn301_panel_cntl.h"
+#include "dcn301/dcn301_panel_cntl.h"
 
 #include "vangogh_ip_offset.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h
index ae8672680cdd..ae8672680cdd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
index 63ac984a04f7..5791b5cc2875 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
@@ -23,9 +23,9 @@
  *
  */
 
-#include "dcn302_init.h"
+#include "dcn302/dcn302_init.h"
 #include "dcn302_resource.h"
-#include "dcn302_dccg.h"
+#include "dcn302/dcn302_dccg.h"
 #include "irq/dcn302/irq_service_dcn302.h"
 
 #include "dcn30/dcn30_dio_link_encoder.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h
index 9f24e73b92b3..9f24e73b92b3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
index 49cb7fde416a..25cd6236b054 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
@@ -23,9 +23,9 @@
  * Authors: AMD
  */
 
-#include "dcn303_init.h"
+#include "dcn303/dcn303_init.h"
 #include "dcn303_resource.h"
-#include "dcn303_dccg.h"
+#include "dcn303/dcn303_dccg.h"
 #include "irq/dcn303/irq_service_dcn303.h"
 
 #include "dcn30/dcn30_dio_link_encoder.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h
index 37cf1525820b..37cf1525820b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
index 79416cfb22f0..31035fc3d868 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
@@ -70,7 +70,7 @@
 #include "dml/dcn31/dcn31_fpu.h"
 #include "dcn31/dcn31_dccg.h"
 #include "dcn10/dcn10_resource.h"
-#include "dcn31_panel_cntl.h"
+#include "dcn31/dcn31_panel_cntl.h"
 
 #include "dcn30/dcn30_dwb.h"
 #include "dcn30/dcn30_mmhubbub.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
index 901436591ed4..901436591ed4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
index 677361d74a4e..c97391edb5ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
@@ -871,7 +871,7 @@ static const struct dc_plane_cap plane_cap = {
 static const struct dc_debug_options debug_defaults_drv = {
 	.disable_z10 = false,
 	.enable_z9_disable_interface = true,
-	.minimum_z8_residency_time = 2000,
+	.minimum_z8_residency_time = 2100,
 	.psr_skip_crtc_disable = true,
 	.replay_skip_crtc_disabled = true,
 	.disable_dmcu = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h
index 49ffe71018df..49ffe71018df 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
index cb8024eee8e4..515ba435f759 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
@@ -1631,8 +1631,10 @@ static bool allow_pixel_rate_crb(struct dc *dc, struct dc_state *context)
 	int i;
 	struct resource_context *res_ctx = &context->res_ctx;
 
-	/*Don't apply for single stream*/
-	if (context->stream_count < 2)
+	/* Only apply for dual stream scenarios with edp*/
+	if (context->stream_count != 2)
+		return false;
+	if (context->streams[0]->signal != SIGNAL_TYPE_EDP && context->streams[1]->signal != SIGNAL_TYPE_EDP)
 		return false;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h
index 22849eaa6f24..22849eaa6f24 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
index b9753d4606f8..b9753d4606f8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h
index aba6d634131b..aba6d634131b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index 89b072447dba..ac04a9c9a3d8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -27,7 +27,7 @@
 #include "dm_services.h"
 #include "dc.h"
 
-#include "dcn32_init.h"
+#include "dcn32/dcn32_init.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
@@ -41,7 +41,7 @@
 #include "dcn31/dcn31_hubbub.h"
 #include "dcn32/dcn32_hubbub.h"
 #include "dcn32/dcn32_mpc.h"
-#include "dcn32_hubp.h"
+#include "dcn32/dcn32_hubp.h"
 #include "irq/dcn32/irq_service_dcn32.h"
 #include "dcn32/dcn32_dpp.h"
 #include "dcn32/dcn32_optc.h"
@@ -89,6 +89,8 @@
 #include "dcn20/dcn20_vmid.h"
 #include "dml/dcn32/dcn32_fpu.h"
 
+#include "dc_state_priv.h"
+
 #include "dml2/dml2_wrapper.h"
 
 #define DC_LOGGER_INIT(logger)
@@ -1644,7 +1646,7 @@ static void dcn32_enable_phantom_plane(struct dc *dc,
 		if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state)
 			phantom_plane = prev_phantom_plane;
 		else
-			phantom_plane = dc_create_plane_state(dc);
+			phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state);
 
 		memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address));
 		memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality,
@@ -1665,9 +1667,7 @@ static void dcn32_enable_phantom_plane(struct dc *dc,
 		phantom_plane->clip_rect.y = 0;
 		phantom_plane->clip_rect.height = phantom_stream->src.height;
 
-		phantom_plane->is_phantom = true;
-
-		dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context);
+		dc_state_add_phantom_plane(dc, phantom_stream, phantom_plane, context);
 
 		curr_pipe = curr_pipe->bottom_pipe;
 		prev_phantom_plane = phantom_plane;
@@ -1683,13 +1683,7 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc,
 	struct dc_stream_state *phantom_stream = NULL;
 	struct pipe_ctx *ref_pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx];
 
-	phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink);
-	phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
-	phantom_stream->dpms_off = true;
-	phantom_stream->mall_stream_config.type = SUBVP_PHANTOM;
-	phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream;
-	ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN;
-	ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream;
+	phantom_stream = dc_state_create_phantom_stream(dc, context, ref_pipe->stream);
 
 	/* stream has limited viewport and small timing */
 	memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing));
@@ -1699,81 +1693,10 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc,
 	dcn32_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream, pipes, pipe_cnt, dc_pipe_idx);
 	DC_FP_END();
 
-	dc_add_stream_to_ctx(dc, context, phantom_stream);
+	dc_state_add_phantom_stream(dc, context, phantom_stream, ref_pipe->stream);
 	return phantom_stream;
 }
 
-void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context)
-{
-	int i;
-	struct dc_plane_state *phantom_plane = NULL;
-	struct dc_stream_state *phantom_stream = NULL;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (resource_is_pipe_type(pipe, OTG_MASTER) &&
-				resource_is_pipe_type(pipe, DPP_PIPE) &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-			phantom_plane = pipe->plane_state;
-			phantom_stream = pipe->stream;
-
-			dc_plane_state_retain(phantom_plane);
-			dc_stream_retain(phantom_stream);
-		}
-	}
-}
-
-// return true if removed piped from ctx, false otherwise
-bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update)
-{
-	int i;
-	bool removed_pipe = false;
-	struct dc_plane_state *phantom_plane = NULL;
-	struct dc_stream_state *phantom_stream = NULL;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-		// build scaling params for phantom pipes
-		if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-			phantom_plane = pipe->plane_state;
-			phantom_stream = pipe->stream;
-
-			dc_rem_all_planes_for_stream(dc, pipe->stream, context);
-			dc_remove_stream_from_ctx(dc, context, pipe->stream);
-
-			/* Ref count is incremented on allocation and also when added to the context.
-			 * Therefore we must call release for the the phantom plane and stream once
-			 * they are removed from the ctx to finally decrement the refcount to 0 to free.
-			 */
-			dc_plane_state_release(phantom_plane);
-			dc_stream_release(phantom_stream);
-
-			removed_pipe = true;
-		}
-
-		/* For non-full updates, a shallow copy of the current state
-		 * is created. In this case we don't want to erase the current
-		 * state (there can be 2 HIRQL threads, one in flip, and one in
-		 * checkMPO) that can cause a race condition.
-		 *
-		 * This is just a workaround, needs a proper fix.
-		 */
-		if (!fast_update) {
-			// Clear all phantom stream info
-			if (pipe->stream) {
-				pipe->stream->mall_stream_config.type = SUBVP_NONE;
-				pipe->stream->mall_stream_config.paired_stream = NULL;
-			}
-
-			if (pipe->plane_state) {
-				pipe->plane_state->is_phantom = false;
-			}
-		}
-	}
-	return removed_pipe;
-}
-
 /* TODO: Input to this function should indicate which pipe indexes (or streams)
  * require a phantom pipe / stream
  */
@@ -1798,7 +1721,7 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context,
 		// We determine which phantom pipes were added by comparing with
 		// the phantom stream.
 		if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
 			pipe->stream->use_dynamic_meta = false;
 			pipe->plane_state->flip_immediate = false;
 			if (!resource_build_scaling_params(pipe)) {
@@ -1817,7 +1740,6 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_val
 	int vlevel = 0;
 	int pipe_cnt = 0;
 	display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
-	struct mall_temp_config mall_temp_config;
 
 	/* To handle Freesync properly, setting FreeSync DML parameters
 	 * to its default state for the first stage of validation
@@ -1827,29 +1749,12 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_val
 
 	DC_LOGGER_INIT(dc->ctx->logger);
 
-	/* For fast validation, there are situations where a shallow copy of
-	 * of the dc->current_state is created for the validation. In this case
-	 * we want to save and restore the mall config because we always
-	 * teardown subvp at the beginning of validation (and don't attempt
-	 * to add it back if it's fast validation). If we don't restore the
-	 * subvp config in cases of fast validation + shallow copy of the
-	 * dc->current_state, the dc->current_state will have a partially
-	 * removed subvp state when we did not intend to remove it.
-	 */
-	if (fast_validate) {
-		memset(&mall_temp_config, 0, sizeof(mall_temp_config));
-		dcn32_save_mall_state(dc, context, &mall_temp_config);
-	}
-
 	BW_VAL_TRACE_COUNT();
 
 	DC_FP_START();
 	out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
 	DC_FP_END();
 
-	if (fast_validate)
-		dcn32_restore_mall_state(dc, context, &mall_temp_config);
-
 	if (pipe_cnt == 0)
 		goto validate_out;
 
@@ -1933,7 +1838,7 @@ int dcn32_populate_dml_pipes_from_context(
 		 * This is just a workaround -- needs a proper fix.
 		 */
 		if (!fast_validate) {
-			switch (pipe->stream->mall_stream_config.type) {
+			switch (dc_state_get_pipe_subvp_type(context, pipe)) {
 			case SUBVP_MAIN:
 				pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport;
 				subvp_in_use = true;
@@ -1994,7 +1899,7 @@ int dcn32_populate_dml_pipes_from_context(
 
 static struct dc_cap_funcs cap_funcs = {
 	.get_dcc_compression_cap = dcn20_get_dcc_compression_cap,
-	.get_subvp_en = dcn32_subvp_in_use,
+	.get_subvp_en = resource_subvp_in_use,
 };
 
 void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context,
@@ -2037,10 +1942,7 @@ static struct resource_funcs dcn32_res_pool_funcs = {
 	.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
 	.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
 	.add_phantom_pipes = dcn32_add_phantom_pipes,
-	.remove_phantom_pipes = dcn32_remove_phantom_pipes,
-	.retain_phantom_pipes = dcn32_retain_phantom_pipes,
-	.save_mall_state = dcn32_save_mall_state,
-	.restore_mall_state = dcn32_restore_mall_state,
+	.build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
 };
 
 static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -2453,16 +2355,19 @@ static bool dcn32_resource_construct(
 	dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head;
 
 	dc->dml2_options.svp_pstate.callbacks.dc = dc;
-	dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context;
-	dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx;
+	dc->dml2_options.svp_pstate.callbacks.add_phantom_plane = &dc_state_add_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.add_phantom_stream = &dc_state_add_phantom_stream;
 	dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params;
-	dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state;
-	dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context;
-	dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx;
-	dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink;
-	dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release;
-	dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release;
+	dc->dml2_options.svp_pstate.callbacks.create_phantom_plane = &dc_state_create_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.remove_phantom_plane = &dc_state_remove_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.remove_phantom_stream = &dc_state_remove_phantom_stream;
+	dc->dml2_options.svp_pstate.callbacks.create_phantom_stream = &dc_state_create_phantom_stream;
+	dc->dml2_options.svp_pstate.callbacks.release_phantom_plane = &dc_state_release_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.release_phantom_stream = &dc_state_release_phantom_stream;
 	dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc;
+	dc->dml2_options.svp_pstate.callbacks.get_pipe_subvp_type = &dc_state_get_pipe_subvp_type;
+	dc->dml2_options.svp_pstate.callbacks.get_stream_subvp_type = &dc_state_get_stream_subvp_type;
+	dc->dml2_options.svp_pstate.callbacks.get_paired_subvp_stream = &dc_state_get_paired_subvp_stream;
 
 	dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us;
 	dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
index b931008114c9..62611acd4bcb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
@@ -39,6 +39,7 @@
 #define DCN3_2_MBLK_HEIGHT_8BPE 64
 #define DCN3_2_DCFCLK_DS_INIT_KHZ 10000 // Choose 10Mhz for init DCFCLK DS freq
 #define SUBVP_HIGH_REFRESH_LIST_LEN 4
+#define SUBVP_ACTIVE_MARGIN_LIST_LEN 2
 #define DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ 1800
 #define DCN3_2_VMIN_DISPCLK_HZ 717000000
 
@@ -57,6 +58,15 @@ struct subvp_high_refresh_list {
 	} res[SUBVP_HIGH_REFRESH_LIST_LEN];
 };
 
+struct subvp_active_margin_list {
+	int min_refresh;
+	int max_refresh;
+	struct {
+		int width;
+		int height;
+	} res[SUBVP_ACTIVE_MARGIN_LIST_LEN];
+};
+
 struct dcn32_resource_pool {
 	struct resource_pool base;
 };
@@ -81,12 +91,6 @@ bool dcn32_release_post_bldn_3dlut(
 		struct dc_3dlut **lut,
 		struct dc_transfer_func **shaper);
 
-bool dcn32_remove_phantom_pipes(struct dc *dc,
-		struct dc_state *context, bool fast_update);
-
-void dcn32_retain_phantom_pipes(struct dc *dc,
-		struct dc_state *context);
-
 void dcn32_add_phantom_pipes(struct dc *dc,
 		struct dc_state *context,
 		display_e2e_pipe_params_st *pipes,
@@ -127,9 +131,6 @@ void dcn32_merge_pipes_for_subvp(struct dc *dc,
 bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
 		struct dc_state *context);
 
-bool dcn32_subvp_in_use(struct dc *dc,
-		struct dc_state *context);
-
 bool dcn32_mpo_in_use(struct dc_state *context);
 
 bool dcn32_any_surfaces_rotated(struct dc *dc, struct dc_state *context);
@@ -159,15 +160,7 @@ void dcn32_determine_det_override(struct dc *dc,
 void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context,
 	display_e2e_pipe_params_st *pipes);
 
-void dcn32_save_mall_state(struct dc *dc,
-		struct dc_state *context,
-		struct mall_temp_config *temp_config);
-
-void dcn32_restore_mall_state(struct dc *dc,
-		struct dc_state *context,
-		struct mall_temp_config *temp_config);
-
-struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context);
+struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context);
 
 bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe);
 
@@ -183,6 +176,8 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context);
 
 bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int vlevel);
 
+void dcn32_update_dml_pipes_odm_policy_based_on_context(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes);
+
 /* definitions for run time init of reg offsets */
 
 /* CLK SRC */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index f7de3eca1225..e1ab207c46f1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -63,7 +63,7 @@
 #include "dcn31/dcn31_apg.h"
 #include "dcn31/dcn31_dio_link_encoder.h"
 #include "dcn32/dcn32_dio_link_encoder.h"
-#include "dcn321_dio_link_encoder.h"
+#include "dcn321/dcn321_dio_link_encoder.h"
 #include "dce/dce_clock_source.h"
 #include "dce/dce_audio.h"
 #include "dce/dce_hwseq.h"
@@ -92,6 +92,8 @@
 #include "vm_helper.h"
 #include "dcn20/dcn20_vmid.h"
 
+#include "dc_state_priv.h"
+
 #define DC_LOGGER_INIT(logger)
 
 enum dcn321_clk_src_array_id {
@@ -1572,7 +1574,7 @@ static void dcn321_destroy_resource_pool(struct resource_pool **pool)
 
 static struct dc_cap_funcs cap_funcs = {
 	.get_dcc_compression_cap = dcn20_get_dcc_compression_cap,
-	.get_subvp_en = dcn32_subvp_in_use,
+	.get_subvp_en = resource_subvp_in_use,
 };
 
 static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
@@ -1605,10 +1607,7 @@ static struct resource_funcs dcn321_res_pool_funcs = {
 	.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
 	.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
 	.add_phantom_pipes = dcn32_add_phantom_pipes,
-	.remove_phantom_pipes = dcn32_remove_phantom_pipes,
-	.retain_phantom_pipes = dcn32_retain_phantom_pipes,
-	.save_mall_state = dcn32_save_mall_state,
-	.restore_mall_state = dcn32_restore_mall_state,
+	.build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
 };
 
 static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -2007,16 +2006,19 @@ static bool dcn321_resource_construct(
 	dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head;
 
 	dc->dml2_options.svp_pstate.callbacks.dc = dc;
-	dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context;
-	dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx;
+	dc->dml2_options.svp_pstate.callbacks.add_phantom_plane = &dc_state_add_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.add_phantom_stream = &dc_state_add_phantom_stream;
 	dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params;
-	dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state;
-	dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context;
-	dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx;
-	dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink;
-	dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release;
-	dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release;
+	dc->dml2_options.svp_pstate.callbacks.create_phantom_plane = &dc_state_create_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.remove_phantom_plane = &dc_state_remove_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.remove_phantom_stream = &dc_state_remove_phantom_stream;
+	dc->dml2_options.svp_pstate.callbacks.create_phantom_stream = &dc_state_create_phantom_stream;
+	dc->dml2_options.svp_pstate.callbacks.release_phantom_plane = &dc_state_release_phantom_plane;
+	dc->dml2_options.svp_pstate.callbacks.release_phantom_stream = &dc_state_release_phantom_stream;
 	dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc;
+	dc->dml2_options.svp_pstate.callbacks.get_pipe_subvp_type = &dc_state_get_pipe_subvp_type;
+	dc->dml2_options.svp_pstate.callbacks.get_stream_subvp_type = &dc_state_get_stream_subvp_type;
+	dc->dml2_options.svp_pstate.callbacks.get_paired_subvp_stream = &dc_state_get_paired_subvp_stream;
 
 	dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us;
 	dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h
index 82cbf009f2d3..82cbf009f2d3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index c7e011d26d41..761ec9891875 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -78,7 +78,7 @@
 #include "dcn10/dcn10_resource.h"
 #include "dcn31/dcn31_panel_cntl.h"
 #include "dcn35/dcn35_hwseq.h"
-#include "dcn35_dio_link_encoder.h"
+#include "dcn35/dcn35_dio_link_encoder.h"
 #include "dml/dcn31/dcn31_fpu.h" /*todo*/
 #include "dml/dcn35/dcn35_fpu.h"
 #include "dcn35/dcn35_dwb.h"
@@ -96,12 +96,15 @@
 #include "reg_helper.h"
 #include "dce/dmub_abm.h"
 #include "dce/dmub_psr.h"
+#include "dce/dmub_replay.h"
 #include "dce/dce_aux.h"
 #include "dce/dce_i2c.h"
 #include "dml/dcn31/display_mode_vba_31.h" /*temp*/
 #include "vm_helper.h"
 #include "dcn20/dcn20_vmid.h"
 
+#include "dc_state_priv.h"
+
 #include "link_enc_cfg.h"
 #define DC_LOGGER_INIT(logger)
 
@@ -626,7 +629,19 @@ static struct dce_hwseq_registers hwseq_reg;
 	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
 	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
 	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
-	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh)
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK0_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK1_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK2_GATE_DISABLE, mask_sh),\
+	HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK3_GATE_DISABLE, mask_sh)
 
 static const struct dce_hwseq_shift hwseq_shift = {
 		HWSEQ_DCN35_MASK_SH_LIST(__SHIFT)
@@ -705,7 +720,9 @@ static const struct dc_debug_options debug_defaults_drv = {
 	.disable_dcc = DCC_ENABLE,
 	.disable_dpp_power_gate = true,
 	.disable_hubp_power_gate = true,
-	.disable_clock_gate = true,
+	.disable_optc_power_gate = true, /*should the same as above two*/
+	.disable_hpo_power_gate = true, /*dmubfw force domain25 on*/
+	.disable_clock_gate = false,
 	.disable_dsc_power_gate = true,
 	.vsr_support = true,
 	.performance_trace = false,
@@ -724,7 +741,7 @@ static const struct dc_debug_options debug_defaults_drv = {
 			.i2c = true,
 			.dmcu = false, // This is previously known to cause hang on S3 cycles if enabled
 			.dscl = true,
-			.cm = false,
+			.cm = true,
 			.mpc = true,
 			.optc = true,
 			.vpg = true,
@@ -752,7 +769,7 @@ static const struct dc_debug_options debug_defaults_drv = {
 	.enable_hpo_pg_support = false,
 	.enable_legacy_fast_update = true,
 	.enable_single_display_2to1_odm_policy = false,
-	.disable_idle_power_optimizations = true,
+	.disable_idle_power_optimizations = false,
 	.dmcub_emulation = false,
 	.disable_boot_optimizations = false,
 	.disable_unbounded_requesting = false,
@@ -764,13 +781,15 @@ static const struct dc_debug_options debug_defaults_drv = {
 	.ignore_pg = true,
 	.psp_disabled_wa = true,
 	.ips2_eval_delay_us = 200,
-	.ips2_entry_delay_us = 400
+	.ips2_entry_delay_us = 400,
+	.static_screen_wait_frames = 2,
 };
 
 static const struct dc_panel_config panel_config_defaults = {
 	.psr = {
 		.disable_psr = false,
 		.disallow_psrsu = false,
+		.disallow_replay = false,
 	},
 	.ilr = {
 		.optimize_edp_link_rate = true,
@@ -1529,6 +1548,9 @@ static void dcn35_resource_destruct(struct dcn35_resource_pool *pool)
 	if (pool->base.psr != NULL)
 		dmub_psr_destroy(&pool->base.psr);
 
+	if (pool->base.replay != NULL)
+		dmub_replay_destroy(&pool->base.replay);
+
 	if (pool->base.pg_cntl != NULL)
 		dcn_pg_cntl_destroy(&pool->base.pg_cntl);
 
@@ -1712,6 +1734,13 @@ static bool dcn35_validate_bandwidth(struct dc *dc,
 
 	out = dml2_validate(dc, context, fast_validate);
 
+	if (fast_validate)
+		return out;
+
+	DC_FP_START();
+	dcn35_decide_zstate_support(dc, context);
+	DC_FP_END();
+
 	return out;
 }
 
@@ -1857,7 +1886,7 @@ static bool dcn35_resource_construct(
 
 	/* Use pipe context based otg sync logic */
 	dc->config.use_pipe_ctx_sync_logic = true;
-	dc->config.use_default_clock_table = false;
+
 	/* read VBIOS LTTPR caps */
 	{
 		if (ctx->dc_bios->funcs->get_lttpr_caps) {
@@ -2006,6 +2035,14 @@ static bool dcn35_resource_construct(
 		goto create_fail;
 	}
 
+	/* Replay */
+	pool->base.replay = dmub_replay_create(ctx);
+	if (pool->base.replay == NULL) {
+		dm_error("DC: failed to create replay obj!\n");
+		BREAK_TO_DEBUGGER();
+		goto create_fail;
+	}
+
 	/* ABM */
 	for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
 		pool->base.multiple_abms[i] = dmub_abm_create(ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
index 99aea102e3f7..a51c4a9eaafe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
@@ -166,6 +166,7 @@ struct resource_pool *dcn35_create_resource_pool(
 	SR(MMHUBBUB_MEM_PWR_CNTL), \
 	SR(DCCG_GATE_DISABLE_CNTL), \
 	SR(DCCG_GATE_DISABLE_CNTL2), \
+	SR(DCCG_GATE_DISABLE_CNTL4), \
 	SR(DCCG_GATE_DISABLE_CNTL5), \
 	SR(DCFCLK_CNTL),\
 	SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \
diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index df63aa8f01e9..c78c9224ab60 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -86,6 +86,7 @@ enum dmub_status {
 	DMUB_STATUS_TIMEOUT,
 	DMUB_STATUS_INVALID,
 	DMUB_STATUS_HW_FAILURE,
+	DMUB_STATUS_POWER_STATE_D3
 };
 
 /* enum dmub_asic - dmub asic identifier */
@@ -150,6 +151,13 @@ enum dmub_memory_access_type {
 	DMUB_MEMORY_ACCESS_DMA
 };
 
+/* enum dmub_power_state type - to track DC power state in dmub_srv */
+enum dmub_srv_power_state_type {
+	DMUB_POWER_STATE_UNDEFINED = 0,
+	DMUB_POWER_STATE_D0 = 1,
+	DMUB_POWER_STATE_D3 = 8
+};
+
 /**
  * struct dmub_region - dmub hw memory region
  * @base: base address for region, must be 256 byte aligned
@@ -485,6 +493,8 @@ struct dmub_srv {
 	/* Feature capabilities reported by fw */
 	struct dmub_feature_caps feature_caps;
 	struct dmub_visual_confirm_color visual_confirm_color;
+
+	enum dmub_srv_power_state_type power_state;
 };
 
 /**
@@ -889,6 +899,18 @@ enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub);
  */
 void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index);
 
+/**
+ * dmub_srv_set_power_state() - Track DC power state in dmub_srv
+ * @dmub: The dmub service
+ * @power_state: DC power state setting
+ *
+ * Store DC power state in dmub_srv.  If dmub_srv is in D3, then don't send messages to DMUB
+ *
+ * Return:
+ *   void
+ */
+void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index ed4379c04715..c64b6c848ef7 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -185,8 +185,7 @@ union abm_flags {
 		unsigned int disable_abm_requested : 1;
 
 		/**
-		 * @disable_abm_immediately: Indicates if driver has requested ABM to be disabled
-		 * immediately.
+		 * @disable_abm_immediately: Indicates if driver has requested ABM to be disabled immediately.
 		 */
 		unsigned int disable_abm_immediately : 1;
 
@@ -654,7 +653,7 @@ union dmub_fw_boot_options {
 		uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/
 		uint32_t usb4_cm_version: 1; /**< 1 CM support */
 		uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */
-		uint32_t usb4_dpia_bw_alloc_supported: 1; /* 1 if USB4 dpia BW allocation supported */
+		uint32_t reserved0: 1;
 		uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/
 		uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */
 		uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/
@@ -818,18 +817,61 @@ enum dmub_gpint_command {
 	 * RETURN: Lower 32-bit mask.
 	 */
 	DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK = 101,
+
 	/**
-	 * DESC: Updates the trace buffer lower 32-bit mask.
+	 * DESC: Updates the trace buffer mask bit0~bit15.
 	 * ARGS: The new mask
 	 * RETURN: Lower 32-bit mask.
 	 */
 	DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0 = 102,
+
 	/**
-	 * DESC: Updates the trace buffer mask bi0~bit15.
+	 * DESC: Updates the trace buffer mask bit16~bit31.
 	 * ARGS: The new mask
 	 * RETURN: Lower 32-bit mask.
 	 */
 	DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1 = 103,
+
+	/**
+	 * DESC: Updates the trace buffer mask bit32~bit47.
+	 * ARGS: The new mask
+	 * RETURN: Lower 32-bit mask.
+	 */
+	DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2 = 114,
+
+	/**
+	 * DESC: Updates the trace buffer mask bit48~bit63.
+	 * ARGS: The new mask
+	 * RETURN: Lower 32-bit mask.
+	 */
+	DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3 = 115,
+
+	/**
+	 * DESC: Read the trace buffer mask bi0~bit15.
+	 */
+	DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0 = 116,
+
+	/**
+	 * DESC: Read the trace buffer mask bit16~bit31.
+	 */
+	DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD1 = 117,
+
+	/**
+	 * DESC: Read the trace buffer mask bi32~bit47.
+	 */
+	DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD2 = 118,
+
+	/**
+	 * DESC: Updates the trace buffer mask bit32~bit63.
+	 */
+	DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD3 = 119,
+
+	/**
+	 * DESC: Enable measurements for various task duration
+	 * ARGS: 0 - Disable measurement
+	 *       1 - Enable measurement
+	 */
+	DMUB_GPINT__TRACE_DMUB_WAKE_ACTIVITY = 123,
 };
 
 /**
@@ -1303,6 +1345,10 @@ enum dmub_cmd_cab_type {
 	 * Fit surfaces in CAB (i.e. CAB enable)
 	 */
 	DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB = 2,
+	/**
+	 * Do not fit surfaces in CAB (i.e. no CAB)
+	 */
+	DMUB_CMD__CAB_DCN_SS_NOT_FIT_IN_CAB = 3,
 };
 
 /**
@@ -2840,6 +2886,14 @@ enum dmub_cmd_replay_type {
 	 * Set power opt and coasting vtotal.
 	 */
 	DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL	= 4,
+	/**
+	 * Set disabled iiming sync.
+	 */
+	DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED	= 5,
+	/**
+	 * Set Residency Frameupdate Timer.
+	 */
+	DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER = 6,
 };
 
 /**
@@ -3003,6 +3057,26 @@ struct dmub_cmd_replay_set_power_opt_data {
 };
 
 /**
+ * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command.
+ */
+struct dmub_cmd_replay_set_timing_sync_data {
+	/**
+	 * Panel Instance.
+	 * Panel isntance to identify which replay_state to use
+	 * Currently the support is only for 0 or 1
+	 */
+	uint8_t panel_inst;
+	/**
+	 * REPLAY set_timing_sync
+	 */
+	uint8_t timing_sync_supported;
+	/**
+	 * Explicit padding to 4 byte boundary.
+	 */
+	uint8_t pad[2];
+};
+
+/**
  * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command.
  */
 struct dmub_rb_cmd_replay_set_power_opt {
@@ -3069,6 +3143,73 @@ struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal {
 };
 
 /**
+ * Definition of a DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command.
+ */
+struct dmub_rb_cmd_replay_set_timing_sync {
+	/**
+	 * Command header.
+	 */
+	struct dmub_cmd_header header;
+	/**
+	 * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command.
+	 */
+	struct dmub_cmd_replay_set_timing_sync_data replay_set_timing_sync_data;
+};
+
+/**
+ * Data passed from driver to FW in  DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command.
+ */
+struct dmub_cmd_replay_frameupdate_timer_data {
+	/**
+	 * Panel Instance.
+	 * Panel isntance to identify which replay_state to use
+	 * Currently the support is only for 0 or 1
+	 */
+	uint8_t panel_inst;
+	/**
+	 * Replay Frameupdate Timer Enable or not
+	 */
+	uint8_t enable;
+	/**
+	 * REPLAY force reflash frame update number
+	 */
+	uint16_t frameupdate_count;
+};
+/**
+ * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER
+ */
+struct dmub_rb_cmd_replay_set_frameupdate_timer {
+	/**
+	 * Command header.
+	 */
+	struct dmub_cmd_header header;
+	/**
+	 * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command.
+	 */
+	struct dmub_cmd_replay_frameupdate_timer_data data;
+};
+
+/**
+ * Definition union of replay command set
+ */
+union dmub_replay_cmd_set {
+	/**
+	 * Panel Instance.
+	 * Panel isntance to identify which replay_state to use
+	 * Currently the support is only for 0 or 1
+	 */
+	uint8_t panel_inst;
+	/**
+	 * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command data.
+	 */
+	struct dmub_cmd_replay_set_timing_sync_data sync_data;
+	/**
+	 * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command data.
+	 */
+	struct dmub_cmd_replay_frameupdate_timer_data timer_data;
+};
+
+/**
  * Set of HW components that can be locked.
  *
  * Note: If updating with more HW components, fields
@@ -3357,6 +3498,16 @@ struct dmub_cmd_abm_set_pipe_data {
 	 * TODO: Remove.
 	 */
 	uint8_t ramping_boundary;
+
+	/**
+	 * PwrSeq HW Instance.
+	 */
+	uint8_t pwrseq_inst;
+
+	/**
+	 * Explicit padding to 4 byte boundary.
+	 */
+	uint8_t pad[3];
 };
 
 /**
@@ -3737,7 +3888,7 @@ enum dmub_cmd_panel_cntl_type {
  * struct dmub_cmd_panel_cntl_data - Panel control data.
  */
 struct dmub_cmd_panel_cntl_data {
-	uint32_t inst; /**< panel instance */
+	uint32_t pwrseq_inst; /**< pwrseq instance */
 	uint32_t current_backlight; /* in/out */
 	uint32_t bl_pwm_cntl; /* in/out */
 	uint32_t bl_pwm_period_cntl; /* in/out */
@@ -3796,7 +3947,7 @@ struct dmub_cmd_lvtma_control_data {
 	uint8_t uc_pwr_action; /**< LVTMA_ACTION */
 	uint8_t bypass_panel_control_wait;
 	uint8_t reserved_0[2]; /**< For future use */
-	uint8_t panel_inst; /**< LVTMA control instance */
+	uint8_t pwrseq_inst; /**< LVTMA control instance */
 	uint8_t reserved_1[3]; /**< For future use */
 };
 
@@ -4201,6 +4352,12 @@ union dmub_rb_cmd {
 	 * Definition of a DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL command.
 	 */
 	struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal replay_set_power_opt_and_coasting_vtotal;
+
+	struct dmub_rb_cmd_replay_set_timing_sync replay_set_timing_sync;
+	/**
+	 * Definition of a DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command.
+	 */
+	struct dmub_rb_cmd_replay_set_frameupdate_timer replay_set_frameupdate_timer;
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index 22fc4ba96def..9ad738805320 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -64,7 +64,7 @@
 
 
 /* Default scratch mem size. */
-#define DMUB_SCRATCH_MEM_SIZE (256)
+#define DMUB_SCRATCH_MEM_SIZE (1024)
 
 /* Number of windows in use. */
 #define DMUB_NUM_WINDOWS (DMUB_WINDOW_TOTAL)
@@ -713,6 +713,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
 		dmub->hw_funcs.reset_release(dmub);
 
 	dmub->hw_init = true;
+	dmub->power_state = DMUB_POWER_STATE_D0;
 
 	return DMUB_STATUS_OK;
 }
@@ -766,6 +767,9 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
 	if (!dmub->hw_init)
 		return DMUB_STATUS_INVALID;
 
+	if (dmub->power_state != DMUB_POWER_STATE_D0)
+		return DMUB_STATUS_POWER_STATE_D3;
+
 	if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity ||
 	    dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) {
 		return DMUB_STATUS_HW_FAILURE;
@@ -784,6 +788,9 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub)
 	if (!dmub->hw_init)
 		return DMUB_STATUS_INVALID;
 
+	if (dmub->power_state != DMUB_POWER_STATE_D0)
+		return DMUB_STATUS_POWER_STATE_D3;
+
 	/**
 	 * Read back all the queued commands to ensure that they've
 	 * been flushed to framebuffer memory. Otherwise DMCUB might
@@ -1077,6 +1084,7 @@ enum dmub_status dmub_srv_wait_for_inbox0_ack(struct dmub_srv *dmub, uint32_t ti
 		ack = dmub->hw_funcs.read_inbox0_ack_register(dmub);
 		if (ack)
 			return DMUB_STATUS_OK;
+		udelay(1);
 	}
 	return DMUB_STATUS_TIMEOUT;
 }
@@ -1099,3 +1107,11 @@ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_
 				subvp_index);
 	}
 }
+
+void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state)
+{
+	if (!dmub || !dmub->hw_init)
+		return;
+
+	dmub->power_state = dmub_srv_power_state;
+}
diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
index bc96d0211360..813463ffe15c 100644
--- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
+++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
@@ -417,6 +417,8 @@ struct integrated_info {
 	/* V2.1 */
 	struct edp_info edp1_info;
 	struct edp_info edp2_info;
+	uint32_t gpuclk_ss_percentage;
+	uint32_t gpuclk_ss_type;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h b/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h
index 42229b4effdc..eced9ad91f1d 100644
--- a/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h
+++ b/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h
@@ -69,6 +69,11 @@ enum hdcp_message_id {
 	HDCP_MESSAGE_ID_READ_RXSTATUS,
 	HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE,
 
+	/* PS175 chip */
+
+	HDCP_MESSAGE_ID_WRITE_PS175_CMD,
+	HDCP_MESSAGE_ID_READ_PS175_RSP,
+
 	HDCP_MESSAGE_ID_MAX
 };
 
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index ccecddafeb05..3955b7e4b2e2 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -81,6 +81,7 @@ fail_alloc_context:
 void mod_freesync_destroy(struct mod_freesync *mod_freesync)
 {
 	struct core_freesync *core_freesync = NULL;
+
 	if (mod_freesync == NULL)
 		return;
 	core_freesync = MOD_FREESYNC_TO_CORE(mod_freesync);
@@ -278,9 +279,8 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
 		}
 	} else if (last_render_time_in_us > (max_render_time_in_us + in_out_vrr->btr.margin_in_us / 2)) {
 		/* Enter Below the Range */
-		if (!in_out_vrr->btr.btr_active) {
+		if (!in_out_vrr->btr.btr_active)
 			in_out_vrr->btr.btr_active = true;
-		}
 	}
 
 	/* BTR set to "not active" so disengage */
@@ -693,10 +693,12 @@ static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf,
 	if (app_tf != TRANSFER_FUNC_UNKNOWN) {
 		infopacket->valid = true;
 
-		if (app_tf != TRANSFER_FUNC_PQ2084) {
+		if (app_tf == TRANSFER_FUNC_PQ2084)
+			infopacket->sb[9] |= 0x20; // PB9 = [Bit 5 = PQ EOTF Active]
+		else {
 			infopacket->sb[6] |= 0x08;  // PB6 = [Bit 3 = Native Color Active]
 			if (app_tf == TRANSFER_FUNC_GAMMA_22)
-				infopacket->sb[9] |= 0x04;  // PB6 = [Bit 2 = Gamma 2.2 EOTF Active]
+				infopacket->sb[9] |= 0x04;  // PB9 = [Bit 2 = Gamma 2.2 EOTF Active]
 		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
index 1ddb4f5eac8e..182e7532dda8 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
@@ -63,6 +63,7 @@ static inline enum mod_hdcp_status check_hdcp_capable_dp(struct mod_hdcp *hdcp)
 static inline enum mod_hdcp_status check_r0p_available_dp(struct mod_hdcp *hdcp)
 {
 	enum mod_hdcp_status status;
+
 	if (is_dp_hdcp(hdcp)) {
 		status = (hdcp->auth.msg.hdcp1.bstatus &
 				DP_BSTATUS_R0_PRIME_READY) ?
@@ -131,9 +132,8 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
 static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
 {
 	/* Avoid device count == 0 to do authentication */
-	if (0 == get_device_count(hdcp)) {
+	if (get_device_count(hdcp) == 0)
 		return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE;
-	}
 
 	/* Some MST display may choose to report the internal panel as an HDCP RX.
 	 * To update this condition with 1(because the immediate repeater's internal
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
index 91c22b96ebde..733f22bed021 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
@@ -208,9 +208,8 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
 static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
 {
 	/* Avoid device count == 0 to do authentication */
-	if (0 == get_device_count(hdcp)) {
+	if (get_device_count(hdcp) == 0)
 		return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE;
-	}
 
 	/* Some MST display may choose to report the internal panel as an HDCP RX.   */
 	/* To update this condition with 1(because the immediate repeater's internal */
@@ -689,9 +688,8 @@ static enum mod_hdcp_status validate_stream_ready(struct mod_hdcp *hdcp,
 	if (is_hdmi_dvi_sl_hdcp(hdcp)) {
 		if (!process_rxstatus(hdcp, event_ctx, input, &status))
 			goto out;
-		if (event_ctx->rx_id_list_ready) {
+		if (event_ctx->rx_id_list_ready)
 			goto out;
-		}
 	}
 	if (is_hdmi_dvi_sl_hdcp(hdcp))
 		if (!mod_hdcp_execute_and_set(check_stream_ready_available,
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
index c62df3bcc7cb..1d83c1b9da10 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
@@ -86,10 +86,12 @@
 #define HDCP_CPIRQ_TRACE(hdcp) \
 		HDCP_LOG_FSM(hdcp, "[Link %d] --> CPIRQ", hdcp->config.index)
 #define HDCP_EVENT_TRACE(hdcp, event) \
-		if (event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) \
-			HDCP_TIMEOUT_TRACE(hdcp); \
-		else if (event == MOD_HDCP_EVENT_CPIRQ) \
-			HDCP_CPIRQ_TRACE(hdcp)
+		do { \
+			if (event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) \
+				HDCP_TIMEOUT_TRACE(hdcp); \
+			else if (event == MOD_HDCP_EVENT_CPIRQ) \
+				HDCP_CPIRQ_TRACE(hdcp);	\
+		} while (0)
 /* TODO: find some way to tell if logging is off to save time */
 #define HDCP_DDC_READ_TRACE(hdcp, msg_name, msg, msg_size) do { \
 		mod_hdcp_dump_binary_message(msg, msg_size, hdcp->buf, \
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
index ee67a35c2a8e..8c137d7c032e 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
@@ -443,7 +443,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_dp_stream_encryption(struct mod_hdcp
 	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
 
 		if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE)
-				continue;
+			continue;
 
 		memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
 
@@ -926,7 +926,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_enable_dp_stream_encryption(struct mod_hdcp
 
 	for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
 		if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE)
-				continue;
+			continue;
 
 		hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.display_handle = hdcp->displays[i].index;
 		hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.session_handle = hdcp->auth.id;
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
index 5b71bc96b98c..7844ea91650b 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
@@ -98,9 +98,9 @@ enum ta_dtm_encoder_type {
  * This enum defines software value for dio_output_type
  */
 typedef enum {
-    TA_DTM_DIO_OUTPUT_TYPE__INVALID,
-    TA_DTM_DIO_OUTPUT_TYPE__DIRECT,
-    TA_DTM_DIO_OUTPUT_TYPE__DPIA
+	TA_DTM_DIO_OUTPUT_TYPE__INVALID,
+	TA_DTM_DIO_OUTPUT_TYPE__DIRECT,
+	TA_DTM_DIO_OUTPUT_TYPE__DPIA
 } ta_dtm_dio_output_type;
 
 struct ta_dtm_topology_update_input_v3 {
@@ -237,11 +237,11 @@ enum ta_hdcp2_hdcp2_msg_id_max_size {
 #define TA_HDCP__HDCP1_KSV_LIST_MAX_ENTRIES 127
 #define TA_HDCP__HDCP1_V_PRIME_SIZE 20
 #define TA_HDCP__HDCP2_TX_BUF_MAX_SIZE                                                                                 \
-	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM + 6
+	(TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM + 6)
 
 // 64 bits boundaries
 #define TA_HDCP__HDCP2_RX_BUF_MAX_SIZE                                                                                 \
-	TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO + 4
+	(TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO + 4)
 
 enum ta_hdcp_status {
 	TA_HDCP_STATUS__SUCCESS = 0x00,
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
index afe1f6cce528..cc3dc9b589f6 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
@@ -23,34 +23,6 @@
  *
  */
 
-
-
-
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
 #ifndef MOD_FREESYNC_H_
 #define MOD_FREESYNC_H_
 
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index 84f9b412a4f1..738ee763f24a 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -147,12 +147,15 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 	}
 
 	/* VSC packet set to 4 for PSR-SU, or 2 for PSR1 */
-	if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
-		vsc_packet_revision = vsc_packet_rev4;
-	else if (stream->link->replay_settings.config.replay_supported)
+	if (stream->link->psr_settings.psr_feature_enabled) {
+		if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
+			vsc_packet_revision = vsc_packet_rev4;
+		else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1)
+			vsc_packet_revision = vsc_packet_rev2;
+	}
+
+	if (stream->link->replay_settings.config.replay_supported)
 		vsc_packet_revision = vsc_packet_rev4;
-	else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1)
-		vsc_packet_revision = vsc_packet_rev2;
 
 	/* Update to revision 5 for extended colorimetry support */
 	if (stream->use_vsc_sdp_for_colorimetry)
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index a522a7c02911..ad98e504c00d 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -31,7 +31,7 @@
 
 #define DIV_ROUNDUP(a, b) (((a)+((b)/2))/(b))
 #define bswap16_based_on_endian(big_endian, value) \
-	(big_endian) ? cpu_to_be16(value) : cpu_to_le16(value)
+	((big_endian) ? cpu_to_be16(value) : cpu_to_le16(value))
 
 /* Possible Min Reduction config from least aggressive to most aggressive
  *  0    1     2     3     4     5     6     7     8     9     10    11   12
@@ -839,6 +839,8 @@ bool is_psr_su_specific_panel(struct dc_link *link)
 				((dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x08) ||
 				(dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x07)))
 				isPSRSUSupported = false;
+			else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03)
+				isPSRSUSupported = false;
 			else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1)
 				isPSRSUSupported = true;
 		}
@@ -971,6 +973,34 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link,
 	return true;
 }
 
+void set_replay_coasting_vtotal(struct dc_link *link,
+	enum replay_coasting_vtotal_type type,
+	uint16_t vtotal)
+{
+	link->replay_settings.coasting_vtotal_table[type] = vtotal;
+}
+
+void calculate_replay_link_off_frame_count(struct dc_link *link,
+	uint16_t vtotal, uint16_t htotal)
+{
+	uint8_t max_link_off_frame_count = 0;
+	uint16_t max_deviation_line = 0,  pixel_deviation_per_line = 0;
+
+	max_deviation_line = link->dpcd_caps.pr_info.max_deviation_line;
+	pixel_deviation_per_line = link->dpcd_caps.pr_info.pixel_deviation_per_line;
+
+	if (htotal != 0 && vtotal != 0)
+		max_link_off_frame_count = htotal * max_deviation_line / (pixel_deviation_per_line * vtotal);
+	else
+		ASSERT(0);
+
+	link->replay_settings.link_off_frame_count_level =
+		max_link_off_frame_count >= PR_LINK_OFF_FRAME_COUNT_BEST ? PR_LINK_OFF_FRAME_COUNT_BEST :
+		max_link_off_frame_count >= PR_LINK_OFF_FRAME_COUNT_GOOD ? PR_LINK_OFF_FRAME_COUNT_GOOD :
+		PR_LINK_OFF_FRAME_COUNT_FAIL;
+
+}
+
 bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_backlight_caps *caps)
 {
 	unsigned int data_points_size;
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
index d9e0d67d67f7..c17bbc6fb38c 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
@@ -54,6 +54,11 @@ bool dmub_init_abm_config(struct resource_pool *res_pool,
 		unsigned int inst);
 
 void init_replay_config(struct dc_link *link, struct replay_config *pr_config);
+void set_replay_coasting_vtotal(struct dc_link *link,
+	enum replay_coasting_vtotal_type type,
+	uint16_t vtotal);
+void calculate_replay_link_off_frame_count(struct dc_link *link,
+	uint16_t vtotal, uint16_t htotal);
 
 bool is_psr_su_specific_panel(struct dc_link *link);
 void mod_power_calc_psr_configs(struct psr_config *psr_config,
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 7f98394338c2..1dc5dd9b7bf7 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -244,7 +244,6 @@ enum DC_FEATURE_MASK {
 	DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default
 	DC_PSR_ALLOW_SMU_OPT = (1 << 7), //0x80, disabled by default
 	DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), //0x100, disabled by default
-	DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4
 };
 
 enum DC_DEBUG_MASK {
@@ -255,8 +254,10 @@ enum DC_DEBUG_MASK {
 	DC_DISABLE_PSR = 0x10,
 	DC_FORCE_SUBVP_MCLK_SWITCH = 0x20,
 	DC_DISABLE_MPO = 0x40,
-	DC_DISABLE_REPLAY = 0x50,
 	DC_ENABLE_DPIA_TRACE = 0x80,
+	DC_ENABLE_DML2 = 0x100,
+	DC_DISABLE_PSR_SU = 0x200,
+	DC_DISABLE_REPLAY = 0x400,
 };
 
 enum amd_dpm_forced_level;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
new file mode 100644
index 000000000000..be519c8edf49
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_REG_STATE_H__
+#define __AMDGPU_REG_STATE_H__
+
+enum amdgpu_reg_state {
+	AMDGPU_REG_STATE_TYPE_INVALID	= 0,
+	AMDGPU_REG_STATE_TYPE_XGMI	= 1,
+	AMDGPU_REG_STATE_TYPE_WAFL	= 2,
+	AMDGPU_REG_STATE_TYPE_PCIE	= 3,
+	AMDGPU_REG_STATE_TYPE_USR	= 4,
+	AMDGPU_REG_STATE_TYPE_USR_1	= 5
+};
+
+enum amdgpu_sysfs_reg_offset {
+	AMDGPU_SYS_REG_STATE_XGMI	= 0x0000,
+	AMDGPU_SYS_REG_STATE_WAFL	= 0x1000,
+	AMDGPU_SYS_REG_STATE_PCIE	= 0x2000,
+	AMDGPU_SYS_REG_STATE_USR	= 0x3000,
+	AMDGPU_SYS_REG_STATE_USR_1	= 0x4000,
+	AMDGPU_SYS_REG_STATE_END	= 0x5000,
+};
+
+struct amdgpu_reg_state_header {
+	uint16_t		structure_size;
+	uint8_t			format_revision;
+	uint8_t			content_revision;
+	uint8_t			state_type;
+	uint8_t			num_instances;
+	uint16_t		pad;
+};
+
+enum amdgpu_reg_inst_state {
+	AMDGPU_INST_S_OK,
+	AMDGPU_INST_S_EDISABLED,
+	AMDGPU_INST_S_EACCESS,
+};
+
+struct amdgpu_smn_reg_data {
+	uint64_t addr;
+	uint32_t value;
+	uint32_t pad;
+};
+
+struct amdgpu_reg_inst_header {
+	uint16_t	instance;
+	uint16_t	state;
+	uint16_t	num_smn_regs;
+	uint16_t	pad;
+};
+
+
+struct amdgpu_regs_xgmi_v1_0 {
+	struct amdgpu_reg_inst_header	inst_header;
+
+	struct amdgpu_smn_reg_data	smn_reg_values[];
+};
+
+struct amdgpu_reg_state_xgmi_v1_0 {
+	/* common_header.state_type must be AMDGPU_REG_STATE_TYPE_XGMI */
+	struct amdgpu_reg_state_header	common_header;
+
+	struct amdgpu_regs_xgmi_v1_0	xgmi_state_regs[];
+};
+
+struct amdgpu_regs_wafl_v1_0 {
+	struct amdgpu_reg_inst_header	inst_header;
+
+	struct amdgpu_smn_reg_data	smn_reg_values[];
+};
+
+struct amdgpu_reg_state_wafl_v1_0 {
+	/* common_header.state_type must be AMDGPU_REG_STATE_TYPE_WAFL */
+	struct amdgpu_reg_state_header	common_header;
+
+	struct amdgpu_regs_wafl_v1_0	wafl_state_regs[];
+};
+
+struct amdgpu_regs_pcie_v1_0 {
+	struct amdgpu_reg_inst_header	inst_header;
+
+	uint16_t			device_status;
+	uint16_t			link_status;
+	uint32_t			sub_bus_number_latency;
+	uint32_t			pcie_corr_err_status;
+	uint32_t			pcie_uncorr_err_status;
+
+	struct amdgpu_smn_reg_data	smn_reg_values[];
+};
+
+struct amdgpu_reg_state_pcie_v1_0 {
+	/* common_header.state_type must be AMDGPU_REG_STATE_TYPE_PCIE */
+	struct amdgpu_reg_state_header	common_header;
+
+	struct amdgpu_regs_pcie_v1_0	pci_state_regs[];
+};
+
+struct amdgpu_regs_usr_v1_0 {
+	struct amdgpu_reg_inst_header	inst_header;
+
+	struct amdgpu_smn_reg_data	smn_reg_values[];
+};
+
+struct amdgpu_reg_state_usr_v1_0 {
+	/* common_header.state_type must be AMDGPU_REG_STATE_TYPE_USR */
+	struct amdgpu_reg_state_header	common_header;
+
+	struct amdgpu_regs_usr_v1_0	usr_state_regs[];
+};
+
+static inline size_t amdgpu_reginst_size(uint16_t num_inst, size_t inst_size,
+					 uint16_t num_regs)
+{
+	return num_inst *
+	       (inst_size + num_regs * sizeof(struct amdgpu_smn_reg_data));
+}
+
+#define amdgpu_asic_get_reg_state_supported(adev) \
+	((adev)->asic_funcs->get_reg_state ? 1 : 0)
+
+#define amdgpu_asic_get_reg_state(adev, state, buf, size)                  \
+	((adev)->asic_funcs->get_reg_state ?                               \
+		 (adev)->asic_funcs->get_reg_state((adev), (state), (buf), \
+						   (size)) :               \
+		 0)
+
+
+int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h
index b64664879211..fca72e2ec929 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h
@@ -6220,12 +6220,20 @@
 #define DCCG_GATE_DISABLE_CNTL4__PHYD_REFCLK_ROOT_GATE_DISABLE__SHIFT                                         0x3
 #define DCCG_GATE_DISABLE_CNTL4__PHYE_REFCLK_ROOT_GATE_DISABLE__SHIFT                                         0x4
 #define DCCG_GATE_DISABLE_CNTL4__HDMICHARCLK0_ROOT_GATE_DISABLE__SHIFT                                        0x11
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK0_GATE_DISABLE__SHIFT                                              0x17
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK1_GATE_DISABLE__SHIFT                                              0x18
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK2_GATE_DISABLE__SHIFT                                              0x19
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK3_GATE_DISABLE__SHIFT                                              0x1a
 #define DCCG_GATE_DISABLE_CNTL4__PHYA_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000001L
 #define DCCG_GATE_DISABLE_CNTL4__PHYB_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000002L
 #define DCCG_GATE_DISABLE_CNTL4__PHYC_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000004L
 #define DCCG_GATE_DISABLE_CNTL4__PHYD_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000008L
 #define DCCG_GATE_DISABLE_CNTL4__PHYE_REFCLK_ROOT_GATE_DISABLE_MASK                                           0x00000010L
 #define DCCG_GATE_DISABLE_CNTL4__HDMICHARCLK0_ROOT_GATE_DISABLE_MASK                                          0x00020000L
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK0_GATE_DISABLE_MASK                                                0x00800000L
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK1_GATE_DISABLE_MASK                                                0x01000000L
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK2_GATE_DISABLE_MASK                                                0x02000000L
+#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK3_GATE_DISABLE_MASK                                                0x04000000L
 #define DPSTREAMCLK_CNTL__DPSTREAMCLK0_SRC_SEL__SHIFT                                                         0x0
 #define DPSTREAMCLK_CNTL__DPSTREAMCLK0_EN__SHIFT                                                              0x3
 #define DPSTREAMCLK_CNTL__DPSTREAMCLK1_SRC_SEL__SHIFT                                                         0x4
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h
index c92c4b83253f..4bff1ef8a9a6 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h
@@ -6369,6 +6369,8 @@
 #define regTCP_INVALIDATE_BASE_IDX                                                                      1
 #define regTCP_STATUS                                                                                   0x19a1
 #define regTCP_STATUS_BASE_IDX                                                                          1
+#define regTCP_CNTL                                                                                     0x19a2
+#define regTCP_CNTL_BASE_IDX                                                                            1
 #define regTCP_CNTL2                                                                                    0x19a3
 #define regTCP_CNTL2_BASE_IDX                                                                           1
 #define regTCP_DEBUG_INDEX                                                                              0x19a5
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
index ff30f04be591..7ee3d291120d 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
@@ -781,6 +781,8 @@
 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2_BASE_IDX                                              5
 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1                                             0x420187
 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1_BASE_IDX                                    5
+#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3                                                  0x4201c6
+#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3_BASE_IDX                                         5
 
 
 // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h
index 7f131999a263..eb8c556d9c93 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h
@@ -24646,6 +24646,35 @@
 //BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1
 #define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK                                  0x00000001L
 #define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK                               0x00000008L
+//BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_PAYLOAD_SIZE_MODE__SHIFT                     0x8
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_PRIV_MAX_PAYLOAD_SIZE__SHIFT                     0x9
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_10BIT_TAG_EN_OVERRIDE__SHIFT                          0xb
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_10BIT_TAG_EN_OVERRIDE__SHIFT                     0xd
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__MST_DROP_SYNC_FLOOD_EN__SHIFT                            0xf
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_PAYLOAD_SIZE_MODE__SHIFT                          0x10
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_PAYLOAD_SIZE__SHIFT                          0x11
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_REQUEST_SIZE_MODE__SHIFT                     0x14
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_READ_REQUEST_SIZE__SHIFT                     0x15
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_SAFE_MODE__SHIFT                             0x18
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_EXTENDED_TAG_EN_OVERRIDE__SHIFT                       0x19
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE__SHIFT                0x1b
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV__SHIFT                0x1c
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_EXTENDED_TAG_EN_OVERRIDE__SHIFT                  0x1e
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_PAYLOAD_SIZE_MODE_MASK                       0x00000100L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_PRIV_MAX_PAYLOAD_SIZE_MASK                       0x00000600L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_10BIT_TAG_EN_OVERRIDE_MASK                            0x00001800L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_10BIT_TAG_EN_OVERRIDE_MASK                       0x00006000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__MST_DROP_SYNC_FLOOD_EN_MASK                              0x00008000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_PAYLOAD_SIZE_MODE_MASK                            0x00010000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_PAYLOAD_SIZE_MASK                            0x000E0000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_REQUEST_SIZE_MODE_MASK                       0x00100000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_READ_REQUEST_SIZE_MASK                       0x00E00000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_SAFE_MODE_MASK                               0x01000000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_EXTENDED_TAG_EN_OVERRIDE_MASK                         0x06000000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE_MASK                  0x08000000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV_MASK                  0x30000000L
+#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_EXTENDED_TAG_EN_OVERRIDE_MASK                    0xC0000000L
 
 // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp
 //BIF_CFG_DEV0_RC0_VENDOR_ID
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h
new file mode 100644
index 000000000000..a4dd372c0541
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2023  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _smuio_10_0_2_OFFSET_HEADER
+
+// addressBlock: smuio_smuio_misc_SmuSmuioDec
+// base address: 0x5a000
+#define mmSMUIO_MCM_CONFIG                                                                             0x0023
+#define mmSMUIO_MCM_CONFIG_BASE_IDX                                                                    0
+#define mmIP_DISCOVERY_VERSION                                                                         0x0000
+#define mmIP_DISCOVERY_VERSION_BASE_IDX                                                                1
+#define mmIO_SMUIO_PINSTRAP                                                                            0x01b1
+#define mmIO_SMUIO_PINSTRAP_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER0                                                                            0x01b2
+#define mmSCRATCH_REGISTER0_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER1                                                                            0x01b3
+#define mmSCRATCH_REGISTER1_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER2                                                                            0x01b4
+#define mmSCRATCH_REGISTER2_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER3                                                                            0x01b5
+#define mmSCRATCH_REGISTER3_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER4                                                                            0x01b6
+#define mmSCRATCH_REGISTER4_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER5                                                                            0x01b7
+#define mmSCRATCH_REGISTER5_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER6                                                                            0x01b8
+#define mmSCRATCH_REGISTER6_BASE_IDX                                                                   1
+#define mmSCRATCH_REGISTER7                                                                            0x01b9
+#define mmSCRATCH_REGISTER7_BASE_IDX                                                                   1
+
+
+// addressBlock: smuio_smuio_reset_SmuSmuioDec
+// base address: 0x5a300
+#define mmSMUIO_MP_RESET_INTR                                                                          0x00c1
+#define mmSMUIO_MP_RESET_INTR_BASE_IDX                                                                 0
+#define mmSMUIO_SOC_HALT                                                                               0x00c2
+#define mmSMUIO_SOC_HALT_BASE_IDX                                                                      0
+#define mmSMUIO_GFX_MISC_CNTL                                                                          0x00c8
+#define mmSMUIO_GFX_MISC_CNTL_BASE_IDX                                                                 0
+
+
+// addressBlock: smuio_smuio_ccxctrl_SmuSmuioDec
+// base address: 0x5a000
+#define mmPWROK_REFCLK_GAP_CYCLES                                                                      0x0001
+#define mmPWROK_REFCLK_GAP_CYCLES_BASE_IDX                                                             1
+#define mmGOLDEN_TSC_INCREMENT_UPPER                                                                   0x0004
+#define mmGOLDEN_TSC_INCREMENT_UPPER_BASE_IDX                                                          1
+#define mmGOLDEN_TSC_INCREMENT_LOWER                                                                   0x0005
+#define mmGOLDEN_TSC_INCREMENT_LOWER_BASE_IDX                                                          1
+#define mmGOLDEN_TSC_COUNT_UPPER                                                                       0x0025
+#define mmGOLDEN_TSC_COUNT_UPPER_BASE_IDX                                                              1
+#define mmGOLDEN_TSC_COUNT_LOWER                                                                       0x0026
+#define mmGOLDEN_TSC_COUNT_LOWER_BASE_IDX                                                              1
+#define mmGFX_GOLDEN_TSC_SHADOW_UPPER                                                                  0x0029
+#define mmGFX_GOLDEN_TSC_SHADOW_UPPER_BASE_IDX                                                         1
+#define mmGFX_GOLDEN_TSC_SHADOW_LOWER                                                                  0x002a
+#define mmGFX_GOLDEN_TSC_SHADOW_LOWER_BASE_IDX                                                         1
+#define mmSOC_GOLDEN_TSC_SHADOW_UPPER                                                                  0x002b
+#define mmSOC_GOLDEN_TSC_SHADOW_UPPER_BASE_IDX                                                         1
+#define mmSOC_GOLDEN_TSC_SHADOW_LOWER                                                                  0x002c
+#define mmSOC_GOLDEN_TSC_SHADOW_LOWER_BASE_IDX                                                         1
+#define mmSOC_GAP_PWROK                                                                                0x002d
+#define mmSOC_GAP_PWROK_BASE_IDX                                                                       1
+
+// addressBlock: smuio_smuio_swtimer_SmuSmuioDec
+// base address: 0x5ac40
+#define mmPWR_VIRT_RESET_REQ                                                                           0x0110
+#define mmPWR_VIRT_RESET_REQ_BASE_IDX                                                                  1
+#define mmPWR_DISP_TIMER_CONTROL                                                                       0x0111
+#define mmPWR_DISP_TIMER_CONTROL_BASE_IDX                                                              1
+#define mmPWR_DISP_TIMER2_CONTROL                                                                      0x0113
+#define mmPWR_DISP_TIMER2_CONTROL_BASE_IDX                                                             1
+#define mmPWR_DISP_TIMER_GLOBAL_CONTROL                                                                0x0115
+#define mmPWR_DISP_TIMER_GLOBAL_CONTROL_BASE_IDX                                                       1
+#define mmPWR_IH_CONTROL                                                                               0x0116
+#define mmPWR_IH_CONTROL_BASE_IDX                                                                      1
+
+// addressBlock: smuio_smuio_svi0_SmuSmuioDec
+// base address: 0x6f000
+#define mmSMUSVI0_TEL_PLANE0                                                                           0x520e
+#define mmSMUSVI0_TEL_PLANE0_BASE_IDX                                                                  1
+#define mmSMUSVI0_PLANE0_CURRENTVID                                                                    0x5217
+#define mmSMUSVI0_PLANE0_CURRENTVID_BASE_IDX                                                           1
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h
new file mode 100644
index 000000000000..d10ae61c346b
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2023  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _smuio_10_0_2_SH_MASK_HEADER
+
+// addressBlock: smuio_smuio_misc_SmuSmuioDec
+//SMUIO_MCM_CONFIG
+#define SMUIO_MCM_CONFIG__DIE_ID__SHIFT                                                                       0x0
+#define SMUIO_MCM_CONFIG__PKG_TYPE__SHIFT                                                                     0x2
+#define SMUIO_MCM_CONFIG__SOCKET_ID__SHIFT                                                                    0x5
+#define SMUIO_MCM_CONFIG__PKG_SUBTYPE__SHIFT                                                                  0x6
+#define SMUIO_MCM_CONFIG__CONSOLE_K__SHIFT                                                                    0x10
+#define SMUIO_MCM_CONFIG__CONSOLE_A__SHIFT                                                                    0x11
+#define SMUIO_MCM_CONFIG__DIE_ID_MASK                                                                         0x00000003L
+#define SMUIO_MCM_CONFIG__PKG_TYPE_MASK                                                                       0x0000001CL
+#define SMUIO_MCM_CONFIG__SOCKET_ID_MASK                                                                      0x00000020L
+#define SMUIO_MCM_CONFIG__PKG_SUBTYPE_MASK                                                                    0x000000C0L
+#define SMUIO_MCM_CONFIG__CONSOLE_K_MASK                                                                      0x00010000L
+#define SMUIO_MCM_CONFIG__CONSOLE_A_MASK                                                                      0x00020000L
+//IP_DISCOVERY_VERSION
+#define IP_DISCOVERY_VERSION__IP_DISCOVERY_VERSION__SHIFT                                                     0x0
+#define IP_DISCOVERY_VERSION__IP_DISCOVERY_VERSION_MASK                                                       0xFFFFFFFFL
+//IO_SMUIO_PINSTRAP
+#define IO_SMUIO_PINSTRAP__AUD_PORT_CONN__SHIFT                                                               0x0
+#define IO_SMUIO_PINSTRAP__AUD__SHIFT                                                                         0x3
+#define IO_SMUIO_PINSTRAP__AUD_PORT_CONN_MASK                                                                 0x00000007L
+#define IO_SMUIO_PINSTRAP__AUD_MASK                                                                           0x00000018L
+//SCRATCH_REGISTER0
+#define SCRATCH_REGISTER0__ScratchPad0__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER0__ScratchPad0_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER1
+#define SCRATCH_REGISTER1__ScratchPad1__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER1__ScratchPad1_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER2
+#define SCRATCH_REGISTER2__ScratchPad2__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER2__ScratchPad2_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER3
+#define SCRATCH_REGISTER3__ScratchPad3__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER3__ScratchPad3_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER4
+#define SCRATCH_REGISTER4__ScratchPad4__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER4__ScratchPad4_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER5
+#define SCRATCH_REGISTER5__ScratchPad5__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER5__ScratchPad5_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER6
+#define SCRATCH_REGISTER6__ScratchPad6__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER6__ScratchPad6_MASK                                                                   0xFFFFFFFFL
+//SCRATCH_REGISTER7
+#define SCRATCH_REGISTER7__ScratchPad7__SHIFT                                                                 0x0
+#define SCRATCH_REGISTER7__ScratchPad7_MASK                                                                   0xFFFFFFFFL
+
+// addressBlock: smuio_smuio_reset_SmuSmuioDec
+//SMUIO_MP_RESET_INTR
+#define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR__SHIFT                                                       0x0
+#define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR_MASK                                                         0x00000001L
+//SMUIO_SOC_HALT
+#define SMUIO_SOC_HALT__WDT_FORCE_PWROK_EN__SHIFT                                                             0x2
+#define SMUIO_SOC_HALT__WDT_FORCE_RESETn_EN__SHIFT                                                            0x3
+#define SMUIO_SOC_HALT__WDT_FORCE_PWROK_EN_MASK                                                               0x00000004L
+#define SMUIO_SOC_HALT__WDT_FORCE_RESETn_EN_MASK                                                              0x00000008L
+//SMUIO_GFX_MISC_CNTL
+#define SMUIO_GFX_MISC_CNTL__SMU_GFX_cold_vs_gfxoff__SHIFT                                                    0x0
+#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT                                                         0x1
+#define SMUIO_GFX_MISC_CNTL__PWR_GFX_DLDO_CLK_SWITCH__SHIFT                                                   0x3
+#define SMUIO_GFX_MISC_CNTL__PWR_GFX_RLC_CGPG_EN__SHIFT                                                       0x4
+#define SMUIO_GFX_MISC_CNTL__SMU_GFX_cold_vs_gfxoff_MASK                                                      0x00000001L
+#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK                                                           0x00000006L
+#define SMUIO_GFX_MISC_CNTL__PWR_GFX_DLDO_CLK_SWITCH_MASK                                                     0x00000008L
+#define SMUIO_GFX_MISC_CNTL__PWR_GFX_RLC_CGPG_EN_MASK                                                         0x00000010L
+
+// addressBlock: smuio_smuio_ccxctrl_SmuSmuioDec
+//PWROK_REFCLK_GAP_CYCLES
+#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PreAssertion_clkgap_cycles__SHIFT                                      0x0
+#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PostAssertion_clkgap_cycles__SHIFT                                     0x8
+#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PreAssertion_clkgap_cycles_MASK                                        0x000000FFL
+#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PostAssertion_clkgap_cycles_MASK                                       0x0000FF00L
+//GOLDEN_TSC_INCREMENT_UPPER
+#define GOLDEN_TSC_INCREMENT_UPPER__GoldenTscIncrementUpper__SHIFT                                            0x0
+#define GOLDEN_TSC_INCREMENT_UPPER__GoldenTscIncrementUpper_MASK                                              0x00FFFFFFL
+//GOLDEN_TSC_INCREMENT_LOWER
+#define GOLDEN_TSC_INCREMENT_LOWER__GoldenTscIncrementLower__SHIFT                                            0x0
+#define GOLDEN_TSC_INCREMENT_LOWER__GoldenTscIncrementLower_MASK                                              0xFFFFFFFFL
+//GOLDEN_TSC_COUNT_UPPER
+#define GOLDEN_TSC_COUNT_UPPER__GoldenTscCountUpper__SHIFT                                                    0x0
+#define GOLDEN_TSC_COUNT_UPPER__GoldenTscCountUpper_MASK                                                      0x00FFFFFFL
+//GOLDEN_TSC_COUNT_LOWER
+#define GOLDEN_TSC_COUNT_LOWER__GoldenTscCountLower__SHIFT                                                    0x0
+#define GOLDEN_TSC_COUNT_LOWER__GoldenTscCountLower_MASK                                                      0xFFFFFFFFL
+//GFX_GOLDEN_TSC_SHADOW_UPPER
+#define GFX_GOLDEN_TSC_SHADOW_UPPER__GfxGoldenTscShadowUpper__SHIFT                                           0x0
+#define GFX_GOLDEN_TSC_SHADOW_UPPER__GfxGoldenTscShadowUpper_MASK                                             0x00FFFFFFL
+//GFX_GOLDEN_TSC_SHADOW_LOWER
+#define GFX_GOLDEN_TSC_SHADOW_LOWER__GfxGoldenTscShadowLower__SHIFT                                           0x0
+#define GFX_GOLDEN_TSC_SHADOW_LOWER__GfxGoldenTscShadowLower_MASK                                             0xFFFFFFFFL
+//SOC_GOLDEN_TSC_SHADOW_UPPER
+#define SOC_GOLDEN_TSC_SHADOW_UPPER__SocGoldenTscShadowUpper__SHIFT                                           0x0
+#define SOC_GOLDEN_TSC_SHADOW_UPPER__SocGoldenTscShadowUpper_MASK                                             0x00FFFFFFL
+//SOC_GOLDEN_TSC_SHADOW_LOWER
+#define SOC_GOLDEN_TSC_SHADOW_LOWER__SocGoldenTscShadowLower__SHIFT                                           0x0
+#define SOC_GOLDEN_TSC_SHADOW_LOWER__SocGoldenTscShadowLower_MASK                                             0xFFFFFFFFL
+//SOC_GAP_PWROK
+#define SOC_GAP_PWROK__soc_gap_pwrok__SHIFT                                                                   0x0
+#define SOC_GAP_PWROK__soc_gap_pwrok_MASK                                                                     0x00000001L
+
+// addressBlock: smuio_smuio_swtimer_SmuSmuioDec
+//PWR_VIRT_RESET_REQ
+#define PWR_VIRT_RESET_REQ__VF_FLR__SHIFT                                                                     0x0
+#define PWR_VIRT_RESET_REQ__PF_FLR__SHIFT                                                                     0x1f
+#define PWR_VIRT_RESET_REQ__VF_FLR_MASK                                                                       0x7FFFFFFFL
+#define PWR_VIRT_RESET_REQ__PF_FLR_MASK                                                                       0x80000000L
+//PWR_DISP_TIMER_CONTROL
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_COUNT__SHIFT                                                   0x0
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_ENABLE__SHIFT                                                  0x19
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_DISABLE__SHIFT                                                 0x1a
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MASK__SHIFT                                                    0x1b
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_STAT_AK__SHIFT                                                 0x1c
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_TYPE__SHIFT                                                    0x1d
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MODE__SHIFT                                                    0x1e
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_COUNT_MASK                                                     0x01FFFFFFL
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_ENABLE_MASK                                                    0x02000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_DISABLE_MASK                                                   0x04000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MASK_MASK                                                      0x08000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_STAT_AK_MASK                                                   0x10000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_TYPE_MASK                                                      0x20000000L
+#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MODE_MASK                                                      0x40000000L
+//PWR_DISP_TIMER2_CONTROL
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_COUNT__SHIFT                                                  0x0
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_ENABLE__SHIFT                                                 0x19
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_DISABLE__SHIFT                                                0x1a
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MASK__SHIFT                                                   0x1b
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_STAT_AK__SHIFT                                                0x1c
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_TYPE__SHIFT                                                   0x1d
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MODE__SHIFT                                                   0x1e
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_COUNT_MASK                                                    0x01FFFFFFL
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_ENABLE_MASK                                                   0x02000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_DISABLE_MASK                                                  0x04000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MASK_MASK                                                     0x08000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_STAT_AK_MASK                                                  0x10000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_TYPE_MASK                                                     0x20000000L
+#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MODE_MASK                                                     0x40000000L
+//PWR_DISP_TIMER_GLOBAL_CONTROL
+#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_WIDTH__SHIFT                                          0x0
+#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_EN__SHIFT                                             0xa
+#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_WIDTH_MASK                                            0x000003FFL
+#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_EN_MASK                                               0x00000400L
+//PWR_IH_CONTROL
+#define PWR_IH_CONTROL__MAX_CREDIT__SHIFT                                                                     0x0
+#define PWR_IH_CONTROL__DISP_TIMER_TRIGGER_MASK__SHIFT                                                        0x5
+#define PWR_IH_CONTROL__DISP_TIMER2_TRIGGER_MASK__SHIFT                                                       0x6
+#define PWR_IH_CONTROL__PWR_IH_CLK_GATE_EN__SHIFT                                                             0x1f
+#define PWR_IH_CONTROL__MAX_CREDIT_MASK                                                                       0x0000001FL
+#define PWR_IH_CONTROL__DISP_TIMER_TRIGGER_MASK_MASK                                                          0x00000020L
+#define PWR_IH_CONTROL__DISP_TIMER2_TRIGGER_MASK_MASK                                                         0x00000040L
+#define PWR_IH_CONTROL__PWR_IH_CLK_GATE_EN_MASK                                                               0x80000000L
+
+// addressBlock: smuio_smuio_svi0_SmuSmuioDec
+//SMUSVI0_TEL_PLANE0
+#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_IDDCOR__SHIFT                                                         0x0
+#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_VDDCOR__SHIFT                                                         0x10
+#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_IDDCOR_MASK                                                           0x000000FFL
+#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_VDDCOR_MASK                                                           0x01FF0000L
+//SMUSVI0_PLANE0_CURRENTVID
+#define SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID__SHIFT                                             0x18
+#define SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID_MASK                                               0xFF000000L
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index cd3c40a86029..edcb85560ced 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -318,6 +318,7 @@ enum pp_xgmi_plpd_mode {
 #define MAX_GFX_CLKS 8
 #define MAX_CLKS 4
 #define NUM_VCN 4
+#define NUM_JPEG_ENG 32
 
 struct seq_file;
 enum amd_pp_clock_type;
@@ -421,7 +422,7 @@ struct amd_pm_funcs {
 	int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock);
 	int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
 	int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
-	int (*get_asic_baco_capability)(void *handle, bool *cap);
+	bool (*get_asic_baco_capability)(void *handle);
 	int (*get_asic_baco_state)(void *handle, int *state);
 	int (*set_asic_baco_state)(void *handle, int state);
 	int (*get_ppfeature_status)(void *handle, char *buf);
@@ -431,6 +432,7 @@ struct amd_pm_funcs {
 	int (*set_df_cstate)(void *handle, enum pp_df_cstate state);
 	int (*set_xgmi_pstate)(void *handle, uint32_t pstate);
 	ssize_t (*get_gpu_metrics)(void *handle, void **table);
+	ssize_t (*get_pm_metrics)(void *handle, void *pmmetrics, size_t size);
 	int (*set_watermarks_for_clock_ranges)(void *handle,
 					       struct pp_smu_wm_range_sets *ranges);
 	int (*display_disable_memory_clock_switch)(void *handle,
@@ -444,6 +446,7 @@ struct amd_pm_funcs {
 				   struct dpm_clocks *clock_table);
 	int (*get_smu_prv_buf_details)(void *handle, void **addr, size_t *size);
 	void (*pm_compute_clocks)(void *handle);
+	int (*notify_rlc_state)(void *handle, bool en);
 };
 
 struct metrics_table_header {
@@ -773,6 +776,85 @@ struct gpu_metrics_v1_4 {
 	uint16_t			padding;
 };
 
+struct gpu_metrics_v1_5 {
+	struct metrics_table_header	common_header;
+
+	/* Temperature (Celsius) */
+	uint16_t			temperature_hotspot;
+	uint16_t			temperature_mem;
+	uint16_t			temperature_vrsoc;
+
+	/* Power (Watts) */
+	uint16_t			curr_socket_power;
+
+	/* Utilization (%) */
+	uint16_t			average_gfx_activity;
+	uint16_t			average_umc_activity; // memory controller
+	uint16_t			vcn_activity[NUM_VCN];
+	uint16_t			jpeg_activity[NUM_JPEG_ENG];
+
+	/* Energy (15.259uJ (2^-16) units) */
+	uint64_t			energy_accumulator;
+
+	/* Driver attached timestamp (in ns) */
+	uint64_t			system_clock_counter;
+
+	/* Throttle status */
+	uint32_t			throttle_status;
+
+	/* Clock Lock Status. Each bit corresponds to clock instance */
+	uint32_t			gfxclk_lock_status;
+
+	/* Link width (number of lanes) and speed (in 0.1 GT/s) */
+	uint16_t			pcie_link_width;
+	uint16_t			pcie_link_speed;
+
+	/* XGMI bus width and bitrate (in Gbps) */
+	uint16_t			xgmi_link_width;
+	uint16_t			xgmi_link_speed;
+
+	/* Utilization Accumulated (%) */
+	uint32_t			gfx_activity_acc;
+	uint32_t			mem_activity_acc;
+
+	/*PCIE accumulated bandwidth (GB/sec) */
+	uint64_t			pcie_bandwidth_acc;
+
+	/*PCIE instantaneous bandwidth (GB/sec) */
+	uint64_t			pcie_bandwidth_inst;
+
+	/* PCIE L0 to recovery state transition accumulated count */
+	uint64_t			pcie_l0_to_recov_count_acc;
+
+	/* PCIE replay accumulated count */
+	uint64_t			pcie_replay_count_acc;
+
+	/* PCIE replay rollover accumulated count */
+	uint64_t			pcie_replay_rover_count_acc;
+
+	/* PCIE NAK sent  accumulated count */
+	uint32_t			pcie_nak_sent_count_acc;
+
+	/* PCIE NAK received accumulated count */
+	uint32_t			pcie_nak_rcvd_count_acc;
+
+	/* XGMI accumulated data transfer size(KiloBytes) */
+	uint64_t			xgmi_read_data_acc[NUM_XGMI_LINKS];
+	uint64_t			xgmi_write_data_acc[NUM_XGMI_LINKS];
+
+	/* PMFW attached timestamp (10ns resolution) */
+	uint64_t			firmware_timestamp;
+
+	/* Current clocks (Mhz) */
+	uint16_t			current_gfxclk[MAX_GFX_CLKS];
+	uint16_t			current_socclk[MAX_CLKS];
+	uint16_t			current_vclk0[MAX_CLKS];
+	uint16_t			current_dclk0[MAX_CLKS];
+	uint16_t			current_uclk;
+
+	uint16_t			padding;
+};
+
 /*
  * gpu_metrics_v2_0 is not recommended as it's not naturally aligned.
  * Use gpu_metrics_v2_1 or later instead.
@@ -1084,6 +1166,10 @@ struct gpu_metrics_v3_0 {
 	uint16_t			average_dram_reads;
 	/* time filtered DRAM write bandwidth [MB/sec] */
 	uint16_t			average_dram_writes;
+	/* time filtered IPU read bandwidth [MB/sec] */
+	uint16_t			average_ipu_reads;
+	/* time filtered IPU write bandwidth [MB/sec] */
+	uint16_t			average_ipu_writes;
 
 	/* Driver attached timestamp (in ns) */
 	uint64_t			system_clock_counter;
@@ -1103,6 +1189,8 @@ struct gpu_metrics_v3_0 {
 	uint32_t			average_all_core_power;
 	/* calculated core power [mW] */
 	uint16_t			average_core_power[16];
+	/* time filtered total system power [mW] */
+	uint16_t			average_sys_power;
 	/* maximum IRM defined STAPM power limit [mW] */
 	uint16_t			stapm_power_limit;
 	/* time filtered STAPM power limit [mW] */
@@ -1115,6 +1203,8 @@ struct gpu_metrics_v3_0 {
 	uint16_t			average_ipuclk_frequency;
 	uint16_t			average_fclk_frequency;
 	uint16_t			average_vclk_frequency;
+	uint16_t			average_uclk_frequency;
+	uint16_t			average_mpipu_frequency;
 
 	/* Current clocks */
 	/* target core frequency [MHz] */
@@ -1124,7 +1214,31 @@ struct gpu_metrics_v3_0 {
 	/* GFXCLK frequency limit enforced on GFX [MHz] */
 	uint16_t			current_gfx_maxfreq;
 
+	/* Throttle Residency (ASIC dependent) */
+	uint32_t			throttle_residency_prochot;
+	uint32_t			throttle_residency_spl;
+	uint32_t			throttle_residency_fppt;
+	uint32_t			throttle_residency_sppt;
+	uint32_t			throttle_residency_thm_core;
+	uint32_t			throttle_residency_thm_gfx;
+	uint32_t			throttle_residency_thm_soc;
+
 	/* Metrics table alpha filter time constant [us] */
 	uint32_t			time_filter_alphavalue;
 };
+
+struct amdgpu_pmmetrics_header {
+	uint16_t structure_size;
+	uint16_t pad;
+	uint32_t mp1_ip_discovery_version;
+	uint32_t pmfw_version;
+	uint32_t pmmetrics_version;
+};
+
+struct amdgpu_pm_metrics {
+	struct amdgpu_pmmetrics_header common_header;
+
+	uint8_t data[];
+};
+
 #endif
diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
index b1db2b190187..ec5b9ab67c5e 100644
--- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
@@ -232,6 +232,7 @@ union MESAPI_SET_HW_RESOURCES {
 		};
 		uint32_t	oversubscription_timer;
 		uint64_t        doorbell_info;
+		uint64_t        event_intr_history_gpu_mc_ptr;
 	};
 
 	uint32_t	max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -571,7 +572,8 @@ struct SET_SHADER_DEBUGGER {
 		struct {
 			uint32_t single_memop : 1;  /* SQ_DEBUG.single_memop */
 			uint32_t single_alu_op : 1; /* SQ_DEBUG.single_alu_op */
-			uint32_t reserved : 30;
+			uint32_t reserved : 29;
+			uint32_t process_ctx_flush : 1;
 		};
 		uint32_t u32all;
 	} flags;
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 08cb79401410..6627ee07d52d 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -181,12 +181,29 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
 	return ret;
 }
 
+int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en)
+{
+	int ret = 0;
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+
+	if (pp_funcs && pp_funcs->notify_rlc_state) {
+		mutex_lock(&adev->pm.mutex);
+
+		ret = pp_funcs->notify_rlc_state(
+				adev->powerplay.pp_handle,
+				en);
+
+		mutex_unlock(&adev->pm.mutex);
+	}
+
+	return ret;
+}
+
 bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
 {
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
 	void *pp_handle = adev->powerplay.pp_handle;
-	bool baco_cap;
-	int ret = 0;
+	bool ret;
 
 	if (!pp_funcs || !pp_funcs->get_asic_baco_capability)
 		return false;
@@ -204,12 +221,11 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
 
 	mutex_lock(&adev->pm.mutex);
 
-	ret = pp_funcs->get_asic_baco_capability(pp_handle,
-						 &baco_cap);
+	ret = pp_funcs->get_asic_baco_capability(pp_handle);
 
 	mutex_unlock(&adev->pm.mutex);
 
-	return ret ? false : baco_cap;
+	return ret;
 }
 
 int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev)
@@ -600,6 +616,16 @@ void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable)
 			  enable ? "enable" : "disable", ret);
 }
 
+void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable)
+{
+	int ret = 0;
+
+	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VPE, !enable);
+	if (ret)
+		DRM_ERROR("Dpm %s vpe failed, ret = %d.\n",
+			  enable ? "enable" : "disable", ret);
+}
+
 int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version)
 {
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
@@ -1301,6 +1327,23 @@ int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table)
 	return ret;
 }
 
+ssize_t amdgpu_dpm_get_pm_metrics(struct amdgpu_device *adev, void *pm_metrics,
+				  size_t size)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	int ret = 0;
+
+	if (!pp_funcs->get_pm_metrics)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&adev->pm.mutex);
+	ret = pp_funcs->get_pm_metrics(adev->powerplay.pp_handle, pm_metrics,
+				       size);
+	mutex_unlock(&adev->pm.mutex);
+
+	return ret;
+}
+
 int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev,
 				    uint32_t *fan_mode)
 {
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index ca2ece24e1e0..f3cb490fe79b 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1799,6 +1799,44 @@ static ssize_t amdgpu_set_apu_thermal_cap(struct device *dev,
 	return count;
 }
 
+static int amdgpu_pm_metrics_attr_update(struct amdgpu_device *adev,
+					 struct amdgpu_device_attr *attr,
+					 uint32_t mask,
+					 enum amdgpu_device_attr_states *states)
+{
+	if (amdgpu_dpm_get_pm_metrics(adev, NULL, 0) == -EOPNOTSUPP)
+		*states = ATTR_STATE_UNSUPPORTED;
+
+	return 0;
+}
+
+static ssize_t amdgpu_get_pm_metrics(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	ssize_t size = 0;
+	int ret;
+
+	if (amdgpu_in_reset(adev))
+		return -EPERM;
+	if (adev->in_suspend && !adev->in_runpm)
+		return -EPERM;
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0) {
+		pm_runtime_put_autosuspend(ddev->dev);
+		return ret;
+	}
+
+	size = amdgpu_dpm_get_pm_metrics(adev, buf, PAGE_SIZE);
+
+	pm_runtime_mark_last_busy(ddev->dev);
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	return size;
+}
+
 /**
  * DOC: gpu_metrics
  *
@@ -2096,6 +2134,8 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
 	AMDGPU_DEVICE_ATTR_RW(smartshift_bias,				ATTR_FLAG_BASIC,
 			      .attr_update = ss_bias_attr_update),
 	AMDGPU_DEVICE_ATTR_RW(xgmi_plpd_policy,				ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RO(pm_metrics,				ATTR_FLAG_BASIC,
+			      .attr_update = amdgpu_pm_metrics_attr_update),
 };
 
 static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
@@ -2128,7 +2168,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
 		if (amdgpu_dpm_is_overdrive_supported(adev))
 			*states = ATTR_STATE_SUPPORTED;
 	} else if (DEVICE_ATTR_IS(mem_busy_percent)) {
-		if (adev->flags & AMD_IS_APU || gc_ver == IP_VERSION(9, 0, 1))
+		if ((adev->flags & AMD_IS_APU &&
+		     gc_ver != IP_VERSION(9, 4, 3)) ||
+		    gc_ver == IP_VERSION(9, 0, 1))
 			*states = ATTR_STATE_UNSUPPORTED;
 	} else if (DEVICE_ATTR_IS(pcie_bw)) {
 		/* PCIe Perf counters won't work on APU nodes */
@@ -2198,10 +2240,10 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
 	} else if (DEVICE_ATTR_IS(xgmi_plpd_policy)) {
 		if (amdgpu_dpm_get_xgmi_plpd_mode(adev, NULL) == XGMI_PLPD_NONE)
 			*states = ATTR_STATE_UNSUPPORTED;
-	} else if (DEVICE_ATTR_IS(pp_dpm_mclk_od)) {
+	} else if (DEVICE_ATTR_IS(pp_mclk_od)) {
 		if (amdgpu_dpm_get_mclk_od(adev) == -EOPNOTSUPP)
 			*states = ATTR_STATE_UNSUPPORTED;
-	} else if (DEVICE_ATTR_IS(pp_dpm_sclk_od)) {
+	} else if (DEVICE_ATTR_IS(pp_sclk_od)) {
 		if (amdgpu_dpm_get_sclk_od(adev) == -EOPNOTSUPP)
 			*states = ATTR_STATE_UNSUPPORTED;
 	} else if (DEVICE_ATTR_IS(apu_thermal_cap)) {
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index feccd2a7120d..3047ffe7f244 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -415,6 +415,8 @@ int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev);
 int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
 			     enum pp_mp1_state mp1_state);
 
+int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en);
+
 int amdgpu_dpm_set_gfx_power_up_by_imu(struct amdgpu_device *adev);
 
 int amdgpu_dpm_baco_exit(struct amdgpu_device *adev);
@@ -443,6 +445,7 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev);
 void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable);
 void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable);
 void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable);
+void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable);
 int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version);
 int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool enable);
 int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size);
@@ -511,6 +514,18 @@ int amdgpu_dpm_get_power_profile_mode(struct amdgpu_device *adev,
 int amdgpu_dpm_set_power_profile_mode(struct amdgpu_device *adev,
 				      long *input, uint32_t size);
 int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table);
+
+/**
+ * @get_pm_metrics: Get one snapshot of power management metrics from PMFW. The
+ * sample is copied to pm_metrics buffer. It's expected to be allocated by the
+ * caller and size of the allocated buffer is passed. Max size expected for a
+ * metrics sample is 4096 bytes.
+ *
+ * Return: Actual size of the metrics sample
+ */
+ssize_t amdgpu_dpm_get_pm_metrics(struct amdgpu_device *adev, void *pm_metrics,
+				  size_t size);
+
 int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev,
 				    uint32_t *fan_mode);
 int amdgpu_dpm_set_fan_speed_pwm(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
index 5d28c951a319..5cb4725c773f 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
@@ -2735,10 +2735,8 @@ static int kv_parse_power_table(struct amdgpu_device *adev)
 		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
 			&non_clock_info_array->nonClockInfo[non_clock_array_index];
 		ps = kzalloc(sizeof(struct kv_ps), GFP_KERNEL);
-		if (ps == NULL) {
-			kfree(adev->pm.dpm.ps);
+		if (ps == NULL)
 			return -ENOMEM;
-		}
 		adev->pm.dpm.ps[i].ps_priv = ps;
 		k = 0;
 		idx = (u8 *)&power_state->v2.clockInfoIndex[0];
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
index 81fb4e5dd804..60377747bab4 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
@@ -272,10 +272,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(power_info->pplib4.usVddcDependencyOnSCLKOffset));
 			ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
 								 dep_table);
-			if (ret) {
-				amdgpu_free_extended_power_table(adev);
+			if (ret)
 				return ret;
-			}
 		}
 		if (power_info->pplib4.usVddciDependencyOnMCLKOffset) {
 			dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *)
@@ -283,10 +281,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(power_info->pplib4.usVddciDependencyOnMCLKOffset));
 			ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddci_dependency_on_mclk,
 								 dep_table);
-			if (ret) {
-				amdgpu_free_extended_power_table(adev);
+			if (ret)
 				return ret;
-			}
 		}
 		if (power_info->pplib4.usVddcDependencyOnMCLKOffset) {
 			dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *)
@@ -294,10 +290,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(power_info->pplib4.usVddcDependencyOnMCLKOffset));
 			ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddc_dependency_on_mclk,
 								 dep_table);
-			if (ret) {
-				amdgpu_free_extended_power_table(adev);
+			if (ret)
 				return ret;
-			}
 		}
 		if (power_info->pplib4.usMvddDependencyOnMCLKOffset) {
 			dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *)
@@ -305,10 +299,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(power_info->pplib4.usMvddDependencyOnMCLKOffset));
 			ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.mvdd_dependency_on_mclk,
 								 dep_table);
-			if (ret) {
-				amdgpu_free_extended_power_table(adev);
+			if (ret)
 				return ret;
-			}
 		}
 		if (power_info->pplib4.usMaxClockVoltageOnDCOffset) {
 			ATOM_PPLIB_Clock_Voltage_Limit_Table *clk_v =
@@ -339,10 +331,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				kcalloc(psl->ucNumEntries,
 					sizeof(struct amdgpu_phase_shedding_limits_entry),
 					GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries)
 				return -ENOMEM;
-			}
 
 			entry = &psl->entries[0];
 			for (i = 0; i < psl->ucNumEntries; i++) {
@@ -383,10 +373,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 			ATOM_PPLIB_CAC_Leakage_Record *entry;
 			u32 size = cac_table->ucNumEntries * sizeof(struct amdgpu_cac_leakage_table);
 			adev->pm.dpm.dyn_state.cac_leakage_table.entries = kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.cac_leakage_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.cac_leakage_table.entries)
 				return -ENOMEM;
-			}
 			entry = &cac_table->entries[0];
 			for (i = 0; i < cac_table->ucNumEntries; i++) {
 				if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_EVV) {
@@ -438,10 +426,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				sizeof(struct amdgpu_vce_clock_voltage_dependency_entry);
 			adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries =
 				kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count =
 				limits->numEntries;
 			entry = &limits->entries[0];
@@ -493,10 +479,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				sizeof(struct amdgpu_uvd_clock_voltage_dependency_entry);
 			adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries =
 				kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count =
 				limits->numEntries;
 			entry = &limits->entries[0];
@@ -525,10 +509,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				sizeof(struct amdgpu_clock_voltage_dependency_entry);
 			adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries =
 				kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count =
 				limits->numEntries;
 			entry = &limits->entries[0];
@@ -548,10 +530,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				 le16_to_cpu(ext_hdr->usPPMTableOffset));
 			adev->pm.dpm.dyn_state.ppm_table =
 				kzalloc(sizeof(struct amdgpu_ppm_table), GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.ppm_table) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.ppm_table)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.ppm_table->ppm_design = ppm->ucPpmDesign;
 			adev->pm.dpm.dyn_state.ppm_table->cpu_core_number =
 				le16_to_cpu(ppm->usCpuCoreNumber);
@@ -583,10 +563,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				sizeof(struct amdgpu_clock_voltage_dependency_entry);
 			adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries =
 				kzalloc(size, GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries)
 				return -ENOMEM;
-			}
 			adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count =
 				limits->numEntries;
 			entry = &limits->entries[0];
@@ -606,10 +584,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 			ATOM_PowerTune_Table *pt;
 			adev->pm.dpm.dyn_state.cac_tdp_table =
 				kzalloc(sizeof(struct amdgpu_cac_tdp_table), GFP_KERNEL);
-			if (!adev->pm.dpm.dyn_state.cac_tdp_table) {
-				amdgpu_free_extended_power_table(adev);
+			if (!adev->pm.dpm.dyn_state.cac_tdp_table)
 				return -ENOMEM;
-			}
 			if (rev > 0) {
 				ATOM_PPLIB_POWERTUNE_Table_V1 *ppt = (ATOM_PPLIB_POWERTUNE_Table_V1 *)
 					(mode_info->atom_context->bios + data_offset +
@@ -645,10 +621,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 			ret = amdgpu_parse_clk_voltage_dep_table(
 					&adev->pm.dpm.dyn_state.vddgfx_dependency_on_sclk,
 					dep_table);
-			if (ret) {
-				kfree(adev->pm.dpm.dyn_state.vddgfx_dependency_on_sclk.entries);
+			if (ret)
 				return ret;
-			}
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index fc8e4ac6c8e7..df4f20293c16 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -7379,10 +7379,9 @@ static int si_dpm_init(struct amdgpu_device *adev)
 		kcalloc(4,
 			sizeof(struct amdgpu_clock_voltage_dependency_entry),
 			GFP_KERNEL);
-	if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) {
-		amdgpu_free_extended_power_table(adev);
+	if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries)
 		return -ENOMEM;
-	}
+
 	adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count = 4;
 	adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].clk = 0;
 	adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].v = 0;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index 914c15387157..aed0e2cefbf9 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -1371,21 +1371,18 @@ static int pp_set_active_display_count(void *handle, uint32_t count)
 	return phm_set_active_display_count(hwmgr, count);
 }
 
-static int pp_get_asic_baco_capability(void *handle, bool *cap)
+static bool pp_get_asic_baco_capability(void *handle)
 {
 	struct pp_hwmgr *hwmgr = handle;
 
-	*cap = false;
 	if (!hwmgr)
-		return -EINVAL;
+		return false;
 
 	if (!(hwmgr->not_vf && amdgpu_dpm) ||
 		!hwmgr->hwmgr_func->get_asic_baco_capability)
-		return 0;
+		return false;
 
-	hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr, cap);
-
-	return 0;
+	return hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr);
 }
 
 static int pp_get_asic_baco_state(void *handle, int *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
index 044cda005aed..e8a9471c1898 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c
@@ -33,21 +33,20 @@
 #include "smu/smu_7_1_2_d.h"
 #include "smu/smu_7_1_2_sh_mask.h"
 
-int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
+bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
 	uint32_t reg;
 
-	*cap = false;
 	if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO))
 		return 0;
 
 	reg = RREG32(mmCC_BIF_BX_FUSESTRAP0);
 
 	if (reg & CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_MASK)
-		*cap = true;
+		return true;
 
-	return 0;
+	return false;
 }
 
 int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h
index be0d98abb536..73a773f4ce2e 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h
@@ -25,7 +25,7 @@
 #include "hwmgr.h"
 #include "common_baco.h"
 
-extern int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap);
+extern bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr);
 extern int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 extern int smu7_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
 
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index 11372fcc59c8..b1a8799e2dee 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -2974,6 +2974,8 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		result = smu7_get_evv_voltages(hwmgr);
 		if (result) {
 			pr_info("Get EVV Voltage Failed.  Abort Driver loading!\n");
+			kfree(hwmgr->backend);
+			hwmgr->backend = NULL;
 			return -EINVAL;
 		}
 	} else {
@@ -3019,8 +3021,10 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	}
 
 	result = smu7_update_edc_leakage_table(hwmgr);
-	if (result)
+	if (result) {
+		smu7_hwmgr_backend_fini(hwmgr);
 		return result;
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c
index de0a37f7c632..c66ef9741535 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c
@@ -28,14 +28,13 @@
 #include "vega10_inc.h"
 #include "smu9_baco.h"
 
-int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
+bool smu9_baco_get_capability(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
 	uint32_t reg, data;
 
-	*cap = false;
 	if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO))
-		return 0;
+		return false;
 
 	WREG32(0x12074, 0xFFF0003B);
 	data = RREG32(0x12075);
@@ -44,10 +43,10 @@ int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
 		reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0);
 
 		if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK)
-			*cap = true;
+			return true;
 	}
 
-	return 0;
+	return false;
 }
 
 int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h
index 84e90f801ac3..9ff7c2ea1b58 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h
@@ -25,7 +25,7 @@
 #include "hwmgr.h"
 #include "common_baco.h"
 
-extern int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap);
+extern bool smu9_baco_get_capability(struct pp_hwmgr *hwmgr);
 extern int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 
 #endif
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
index 994c0d374bfa..dad4c80aee58 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
@@ -36,23 +36,22 @@ static const struct soc15_baco_cmd_entry clean_baco_tbl[] = {
 	{CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_7), 0, 0, 0, 0},
 };
 
-int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap)
+bool vega20_baco_get_capability(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
 	uint32_t reg;
 
-	*cap = false;
 	if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO))
-		return 0;
+		return false;
 
 	if (((RREG32(0x17569) & 0x20000000) >> 29) == 0x1) {
 		reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0);
 
 		if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK)
-			*cap = true;
+			return true;
 	}
 
-	return 0;
+	return false;
 }
 
 int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
index f06471e712dc..bdad9c915631 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
@@ -25,7 +25,7 @@
 #include "hwmgr.h"
 #include "common_baco.h"
 
-extern int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap);
+extern bool vega20_baco_get_capability(struct pp_hwmgr *hwmgr);
 extern int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 extern int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
 extern int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr);
diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
index 81650727a5de..6f536159df4d 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
@@ -351,7 +351,7 @@ struct pp_hwmgr_func {
 	int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
 	int (*set_hard_min_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
 	int (*set_soft_max_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
-	int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool *cap);
+	bool (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr);
 	int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 	int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
 	int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char *buf);
diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c
index 9e4228232f02..ad1fd3150d03 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c
@@ -2298,6 +2298,7 @@ static uint32_t ci_get_mac_definition(uint32_t value)
 	case SMU_MAX_ENTRIES_SMIO:
 		return SMU7_MAX_ENTRIES_SMIO;
 	case SMU_MAX_LEVELS_VDDC:
+	case SMU_MAX_LEVELS_VDDGFX:
 		return SMU7_MAX_LEVELS_VDDC;
 	case SMU_MAX_LEVELS_VDDCI:
 		return SMU7_MAX_LEVELS_VDDCI;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c
index 97d9802fe673..17d2f5bff4a7 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c
@@ -2263,6 +2263,7 @@ static uint32_t iceland_get_mac_definition(uint32_t value)
 	case SMU_MAX_ENTRIES_SMIO:
 		return SMU71_MAX_ENTRIES_SMIO;
 	case SMU_MAX_LEVELS_VDDC:
+	case SMU_MAX_LEVELS_VDDGFX:
 		return SMU71_MAX_LEVELS_VDDC;
 	case SMU_MAX_LEVELS_VDDCI:
 		return SMU71_MAX_LEVELS_VDDCI;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 1ead323f1c78..c16703868e5c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1322,6 +1322,187 @@ static int smu_get_thermal_temperature_range(struct smu_context *smu)
 	return ret;
 }
 
+/**
+ * smu_wbrf_handle_exclusion_ranges - consume the wbrf exclusion ranges
+ *
+ * @smu: smu_context pointer
+ *
+ * Retrieve the wbrf exclusion ranges and send them to PMFW for proper handling.
+ * Returns 0 on success, error on failure.
+ */
+static int smu_wbrf_handle_exclusion_ranges(struct smu_context *smu)
+{
+	struct wbrf_ranges_in_out wbrf_exclusion = {0};
+	struct freq_band_range *wifi_bands = wbrf_exclusion.band_list;
+	struct amdgpu_device *adev = smu->adev;
+	uint32_t num_of_wbrf_ranges = MAX_NUM_OF_WBRF_RANGES;
+	uint64_t start, end;
+	int ret, i, j;
+
+	ret = amd_wbrf_retrieve_freq_band(adev->dev, &wbrf_exclusion);
+	if (ret) {
+		dev_err(adev->dev, "Failed to retrieve exclusion ranges!\n");
+		return ret;
+	}
+
+	/*
+	 * The exclusion ranges array we got might be filled with holes and duplicate
+	 * entries. For example:
+	 * {(2400, 2500), (0, 0), (6882, 6962), (2400, 2500), (0, 0), (6117, 6189), (0, 0)...}
+	 * We need to do some sortups to eliminate those holes and duplicate entries.
+	 * Expected output: {(2400, 2500), (6117, 6189), (6882, 6962), (0, 0)...}
+	 */
+	for (i = 0; i < num_of_wbrf_ranges; i++) {
+		start = wifi_bands[i].start;
+		end = wifi_bands[i].end;
+
+		/* get the last valid entry to fill the intermediate hole */
+		if (!start && !end) {
+			for (j = num_of_wbrf_ranges - 1; j > i; j--)
+				if (wifi_bands[j].start && wifi_bands[j].end)
+					break;
+
+			/* no valid entry left */
+			if (j <= i)
+				break;
+
+			start = wifi_bands[i].start = wifi_bands[j].start;
+			end = wifi_bands[i].end = wifi_bands[j].end;
+			wifi_bands[j].start = 0;
+			wifi_bands[j].end = 0;
+			num_of_wbrf_ranges = j;
+		}
+
+		/* eliminate duplicate entries */
+		for (j = i + 1; j < num_of_wbrf_ranges; j++) {
+			if ((wifi_bands[j].start == start) && (wifi_bands[j].end == end)) {
+				wifi_bands[j].start = 0;
+				wifi_bands[j].end = 0;
+			}
+		}
+	}
+
+	/* Send the sorted wifi_bands to PMFW */
+	ret = smu_set_wbrf_exclusion_ranges(smu, wifi_bands);
+	/* Try to set the wifi_bands again */
+	if (unlikely(ret == -EBUSY)) {
+		mdelay(5);
+		ret = smu_set_wbrf_exclusion_ranges(smu, wifi_bands);
+	}
+
+	return ret;
+}
+
+/**
+ * smu_wbrf_event_handler - handle notify events
+ *
+ * @nb: notifier block
+ * @action: event type
+ * @_arg: event data
+ *
+ * Calls relevant amdgpu function in response to wbrf event
+ * notification from kernel.
+ */
+static int smu_wbrf_event_handler(struct notifier_block *nb,
+				  unsigned long action, void *_arg)
+{
+	struct smu_context *smu = container_of(nb, struct smu_context, wbrf_notifier);
+
+	switch (action) {
+	case WBRF_CHANGED:
+		schedule_delayed_work(&smu->wbrf_delayed_work,
+				      msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE));
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+/**
+ * smu_wbrf_delayed_work_handler - callback on delayed work timer expired
+ *
+ * @work: struct work_struct pointer
+ *
+ * Flood is over and driver will consume the latest exclusion ranges.
+ */
+static void smu_wbrf_delayed_work_handler(struct work_struct *work)
+{
+	struct smu_context *smu = container_of(work, struct smu_context, wbrf_delayed_work.work);
+
+	smu_wbrf_handle_exclusion_ranges(smu);
+}
+
+/**
+ * smu_wbrf_support_check - check wbrf support
+ *
+ * @smu: smu_context pointer
+ *
+ * Verifies the ACPI interface whether wbrf is supported.
+ */
+static void smu_wbrf_support_check(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	smu->wbrf_supported = smu_is_asic_wbrf_supported(smu) && amdgpu_wbrf &&
+							acpi_amd_wbrf_supported_consumer(adev->dev);
+
+	if (smu->wbrf_supported)
+		dev_info(adev->dev, "RF interference mitigation is supported\n");
+}
+
+/**
+ * smu_wbrf_init - init driver wbrf support
+ *
+ * @smu: smu_context pointer
+ *
+ * Verifies the AMD ACPI interfaces and registers with the wbrf
+ * notifier chain if wbrf feature is supported.
+ * Returns 0 on success, error on failure.
+ */
+static int smu_wbrf_init(struct smu_context *smu)
+{
+	int ret;
+
+	if (!smu->wbrf_supported)
+		return 0;
+
+	INIT_DELAYED_WORK(&smu->wbrf_delayed_work, smu_wbrf_delayed_work_handler);
+
+	smu->wbrf_notifier.notifier_call = smu_wbrf_event_handler;
+	ret = amd_wbrf_register_notifier(&smu->wbrf_notifier);
+	if (ret)
+		return ret;
+
+	/*
+	 * Some wifiband exclusion ranges may be already there
+	 * before our driver loaded. To make sure our driver
+	 * is awared of those exclusion ranges.
+	 */
+	schedule_delayed_work(&smu->wbrf_delayed_work,
+			      msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE));
+
+	return 0;
+}
+
+/**
+ * smu_wbrf_fini - tear down driver wbrf support
+ *
+ * @smu: smu_context pointer
+ *
+ * Unregisters with the wbrf notifier chain.
+ */
+static void smu_wbrf_fini(struct smu_context *smu)
+{
+	if (!smu->wbrf_supported)
+		return;
+
+	amd_wbrf_unregister_notifier(&smu->wbrf_notifier);
+
+	cancel_delayed_work_sync(&smu->wbrf_delayed_work);
+}
+
 static int smu_smc_hw_setup(struct smu_context *smu)
 {
 	struct smu_feature *feature = &smu->smu_feature;
@@ -1414,6 +1595,15 @@ static int smu_smc_hw_setup(struct smu_context *smu)
 	if (ret)
 		return ret;
 
+	/* Enable UclkShadow on wbrf supported */
+	if (smu->wbrf_supported) {
+		ret = smu_enable_uclk_shadow(smu, true);
+		if (ret) {
+			dev_err(adev->dev, "Failed to enable UclkShadow feature to support wbrf!\n");
+			return ret;
+		}
+	}
+
 	/*
 	 * With SCPM enabled, these actions(and relevant messages) are
 	 * not needed and permitted.
@@ -1512,6 +1702,15 @@ static int smu_smc_hw_setup(struct smu_context *smu)
 	 */
 	ret = smu_set_min_dcef_deep_sleep(smu,
 					  smu->smu_table.boot_values.dcefclk / 100);
+	if (ret) {
+		dev_err(adev->dev, "Error setting min deepsleep dcefclk\n");
+		return ret;
+	}
+
+	/* Init wbrf support. Properly setup the notifier */
+	ret = smu_wbrf_init(smu);
+	if (ret)
+		dev_err(adev->dev, "Error during wbrf init call\n");
 
 	return ret;
 }
@@ -1567,6 +1766,13 @@ static int smu_hw_init(void *handle)
 		return ret;
 	}
 
+	/*
+	 * Check whether wbrf is supported. This needs to be done
+	 * before SMU setup starts since part of SMU configuration
+	 * relies on this.
+	 */
+	smu_wbrf_support_check(smu);
+
 	if (smu->is_apu) {
 		ret = smu_set_gfx_imu_enable(smu);
 		if (ret)
@@ -1710,6 +1916,16 @@ static int smu_disable_dpms(struct smu_context *smu)
 		}
 	}
 
+	/* Notify SMU RLC is going to be off, stop RLC and SMU interaction.
+	 * otherwise SMU will hang while interacting with RLC if RLC is halted
+	 * this is a WA for Vangogh asic which fix the SMU hang issue.
+	 */
+	ret = smu_notify_rlc_state(smu, false);
+	if (ret) {
+		dev_err(adev->dev, "Fail to notify rlc status!\n");
+		return ret;
+	}
+
 	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2) &&
 	    !((adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs) &&
 	    !amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs->stop)
@@ -1723,6 +1939,8 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
+	smu_wbrf_fini(smu);
+
 	cancel_work_sync(&smu->throttling_logging_work);
 	cancel_work_sync(&smu->interrupt_work);
 
@@ -3005,19 +3223,17 @@ static int smu_set_xgmi_pstate(void *handle,
 	return ret;
 }
 
-static int smu_get_baco_capability(void *handle, bool *cap)
+static bool smu_get_baco_capability(void *handle)
 {
 	struct smu_context *smu = handle;
 
-	*cap = false;
-
 	if (!smu->pm_enabled)
-		return 0;
+		return false;
 
-	if (smu->ppt_funcs && smu->ppt_funcs->baco_is_support)
-		*cap = smu->ppt_funcs->baco_is_support(smu);
+	if (!smu->ppt_funcs || !smu->ppt_funcs->baco_is_support)
+		return false;
 
-	return 0;
+	return smu->ppt_funcs->baco_is_support(smu);
 }
 
 static int smu_baco_set_state(void *handle, int state)
@@ -3191,6 +3407,20 @@ static ssize_t smu_sys_get_gpu_metrics(void *handle, void **table)
 	return smu->ppt_funcs->get_gpu_metrics(smu, table);
 }
 
+static ssize_t smu_sys_get_pm_metrics(void *handle, void *pm_metrics,
+				      size_t size)
+{
+	struct smu_context *smu = handle;
+
+	if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+		return -EOPNOTSUPP;
+
+	if (!smu->ppt_funcs->get_pm_metrics)
+		return -EOPNOTSUPP;
+
+	return smu->ppt_funcs->get_pm_metrics(smu, pm_metrics, size);
+}
+
 static int smu_enable_mgpu_fan_boost(void *handle)
 {
 	struct smu_context *smu = handle;
@@ -3332,6 +3562,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
 	.set_df_cstate                    = smu_set_df_cstate,
 	.set_xgmi_pstate                  = smu_set_xgmi_pstate,
 	.get_gpu_metrics                  = smu_sys_get_gpu_metrics,
+	.get_pm_metrics                   = smu_sys_get_pm_metrics,
 	.set_watermarks_for_clock_ranges     = smu_set_watermarks_for_clock_ranges,
 	.display_disable_memory_clock_switch = smu_display_disable_memory_clock_switch,
 	.get_max_sustainable_clocks_by_dc    = smu_get_max_sustainable_clocks_by_dc,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 8def291b18bc..2aa4fea87314 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -22,6 +22,9 @@
 #ifndef __AMDGPU_SMU_H__
 #define __AMDGPU_SMU_H__
 
+#include <linux/acpi_amd_wbrf.h>
+#include <linux/units.h>
+
 #include "amdgpu.h"
 #include "kgd_pp_interface.h"
 #include "dm_pp_interface.h"
@@ -253,6 +256,7 @@ struct smu_table {
 	uint64_t mc_address;
 	void *cpu_addr;
 	struct amdgpu_bo *bo;
+	uint32_t version;
 };
 
 enum smu_perf_level_designation {
@@ -317,6 +321,7 @@ enum smu_table_id {
 	SMU_TABLE_PACE,
 	SMU_TABLE_ECCINFO,
 	SMU_TABLE_COMBO_PPTABLE,
+	SMU_TABLE_WIFIBAND,
 	SMU_TABLE_COUNT,
 };
 
@@ -470,6 +475,12 @@ struct stb_context {
 
 #define WORKLOAD_POLICY_MAX 7
 
+/*
+ * Configure wbrf event handling pace as there can be only one
+ * event processed every SMU_WBRF_EVENT_HANDLING_PACE ms.
+ */
+#define SMU_WBRF_EVENT_HANDLING_PACE	10
+
 struct smu_context {
 	struct amdgpu_device            *adev;
 	struct amdgpu_irq_src		irq_source;
@@ -569,6 +580,11 @@ struct smu_context {
 	struct delayed_work		swctf_delayed_work;
 
 	enum pp_xgmi_plpd_mode plpd_mode;
+
+	/* data structures for wbrf feature support */
+	bool				wbrf_supported;
+	struct notifier_block		wbrf_notifier;
+	struct delayed_work		wbrf_delayed_work;
 };
 
 struct i2c_adapter;
@@ -1253,6 +1269,15 @@ struct pptable_funcs {
 	ssize_t (*get_gpu_metrics)(struct smu_context *smu, void **table);
 
 	/**
+	 * @get_pm_metrics: Get one snapshot of power management metrics from
+	 * PMFW.
+	 *
+	 * Return: Size of the metrics sample
+	 */
+	ssize_t (*get_pm_metrics)(struct smu_context *smu, void *pm_metrics,
+				  size_t size);
+
+	/**
 	 * @enable_mgpu_fan_boost: Enable multi-GPU fan boost.
 	 */
 	int (*enable_mgpu_fan_boost)(struct smu_context *smu);
@@ -1360,6 +1385,27 @@ struct pptable_funcs {
 	 *                       management.
 	 */
 	int (*dpm_set_umsch_mm_enable)(struct smu_context *smu, bool enable);
+
+	/**
+	 * @notify_rlc_state: Notify RLC power state to SMU.
+	 */
+	int (*notify_rlc_state)(struct smu_context *smu, bool en);
+
+	/**
+	 * @is_asic_wbrf_supported: check whether PMFW supports the wbrf feature
+	 */
+	bool (*is_asic_wbrf_supported)(struct smu_context *smu);
+
+	/**
+	 * @enable_uclk_shadow: Enable the uclk shadow feature on wbrf supported
+	 */
+	int (*enable_uclk_shadow)(struct smu_context *smu, bool enable);
+
+	/**
+	 * @set_wbrf_exclusion_ranges: notify SMU the wifi bands occupied
+	 */
+	int (*set_wbrf_exclusion_ranges)(struct smu_context *smu,
+					struct freq_band_range *exclusion_ranges);
 };
 
 typedef enum {
@@ -1403,6 +1449,16 @@ typedef enum {
 	METRICS_PCIE_WIDTH,
 	METRICS_CURR_FANPWM,
 	METRICS_CURR_SOCKETPOWER,
+	METRICS_AVERAGE_VPECLK,
+	METRICS_AVERAGE_IPUCLK,
+	METRICS_AVERAGE_MPIPUCLK,
+	METRICS_THROTTLER_RESIDENCY_PROCHOT,
+	METRICS_THROTTLER_RESIDENCY_SPL,
+	METRICS_THROTTLER_RESIDENCY_FPPT,
+	METRICS_THROTTLER_RESIDENCY_SPPT,
+	METRICS_THROTTLER_RESIDENCY_THM_CORE,
+	METRICS_THROTTLER_RESIDENCY_THM_GFX,
+	METRICS_THROTTLER_RESIDENCY_THM_SOC,
 } MetricsMember_t;
 
 enum smu_cmn2asic_mapping_type {
@@ -1476,6 +1532,17 @@ enum smu_baco_seq {
 			 __dst_size);					   \
 })
 
+typedef struct {
+	uint16_t     LowFreq;
+	uint16_t     HighFreq;
+} WifiOneBand_t;
+
+typedef struct {
+	uint32_t		WifiBandEntryNum;
+	WifiOneBand_t	WifiBandEntry[11];
+	uint32_t		MmHubPadding[8];
+} WifiBandEntryTable_t;
+
 #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && !defined(SWSMU_CODE_LAYER_L4)
 int smu_get_power_limit(void *handle,
 			uint32_t *limit,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index 9dd1ed5b8940..b114d14fc053 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -1615,7 +1615,8 @@ typedef struct {
 #define TABLE_I2C_COMMANDS            9
 #define TABLE_DRIVER_INFO             10
 #define TABLE_ECCINFO                 11
-#define TABLE_COUNT                   12
+#define TABLE_WIFIBAND                12
+#define TABLE_COUNT                   13
 
 //IH Interupt ID
 #define IH_INTERRUPT_ID_TO_DRIVER                   0xFE
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
index 62b7c0daff68..8b1496f8ce58 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
@@ -1605,7 +1605,8 @@ typedef struct {
 #define TABLE_I2C_COMMANDS            9
 #define TABLE_DRIVER_INFO             10
 #define TABLE_ECCINFO                 11
-#define TABLE_COUNT                   12
+#define TABLE_WIFIBAND                12
+#define TABLE_COUNT                   13
 
 //IH Interupt ID
 #define IH_INTERRUPT_ID_TO_DRIVER                   0xFE
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
index 22f88842a7fd..5bb7a63c0602 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
@@ -24,11 +24,6 @@
 #ifndef SMU14_DRIVER_IF_V14_0_0_H
 #define SMU14_DRIVER_IF_V14_0_0_H
 
-// *** IMPORTANT ***
-// SMU TEAM: Always increment the interface version if
-// any structure is changed in this file
-#define PMFW_DRIVER_IF_VERSION 6
-
 typedef struct {
   int32_t value;
   uint32_t numFractionalBits;
@@ -150,37 +145,50 @@ typedef struct {
 } DpmClocks_t;
 
 typedef struct {
-  uint16_t CoreFrequency[16];        //Target core frequency [MHz]
-  uint16_t CorePower[16];            //CAC calculated core power [mW]
-  uint16_t CoreTemperature[16];      //TSEN measured core temperature [centi-C]
-  uint16_t GfxTemperature;           //TSEN measured GFX temperature [centi-C]
-  uint16_t SocTemperature;           //TSEN measured SOC temperature [centi-C]
-  uint16_t StapmOpnLimit;            //Maximum IRM defined STAPM power limit [mW]
-  uint16_t StapmCurrentLimit;        //Time filtered STAPM power limit [mW]
-  uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz]
-  uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz]
-  uint16_t SkinTemp;                 //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C]
-  uint16_t GfxclkFrequency;          //Time filtered target GFXCLK frequency [MHz]
-  uint16_t FclkFrequency;            //Time filtered target FCLK frequency [MHz]
-  uint16_t GfxActivity;              //Time filtered GFX busy % [0-100]
-  uint16_t SocclkFrequency;          //Time filtered target SOCCLK frequency [MHz]
-  uint16_t VclkFrequency;            //Time filtered target VCLK frequency [MHz]
-  uint16_t VcnActivity;              //Time filtered VCN busy % [0-100]
-  uint16_t VpeclkFrequency;          //Time filtered target VPECLK frequency [MHz]
-  uint16_t IpuclkFrequency;          //Time filtered target IPUCLK frequency [MHz]
-  uint16_t IpuBusy[8];               //Time filtered IPU per-column busy % [0-100]
-  uint16_t DRAMReads;                //Time filtered DRAM read bandwidth [MB/sec]
-  uint16_t DRAMWrites;               //Time filtered DRAM write bandwidth [MB/sec]
-  uint16_t CoreC0Residency[16];      //Time filtered per-core C0 residency % [0-100]
-  uint16_t IpuPower;                 //Time filtered IPU power [mW]
-  uint32_t ApuPower;                 //Time filtered APU power [mW]
-  uint32_t GfxPower;                 //Time filtered GFX power [mW]
-  uint32_t dGpuPower;                //Time filtered dGPU power [mW]
-  uint32_t SocketPower;              //Time filtered power used for PPT/STAPM [APU+dGPU] [mW]
-  uint32_t AllCorePower;             //Time filtered sum of core power across all cores in the socket [mW]
-  uint32_t FilterAlphaValue;         //Metrics table alpha filter time constant [us]
-  uint32_t MetricsCounter;           //Counter that is incremented on every metrics table update [PM_TIMER cycles]
-  uint32_t spare[16];
+  uint16_t CoreFrequency[16];          //Target core frequency [MHz]
+  uint16_t CorePower[16];              //CAC calculated core power [mW]
+  uint16_t CoreTemperature[16];        //TSEN measured core temperature [centi-C]
+  uint16_t GfxTemperature;             //TSEN measured GFX temperature [centi-C]
+  uint16_t SocTemperature;             //TSEN measured SOC temperature [centi-C]
+  uint16_t StapmOpnLimit;              //Maximum IRM defined STAPM power limit [mW]
+  uint16_t StapmCurrentLimit;          //Time filtered STAPM power limit [mW]
+  uint16_t InfrastructureCpuMaxFreq;   //CCLK frequency limit enforced on classic cores [MHz]
+  uint16_t InfrastructureGfxMaxFreq;   //GFXCLK frequency limit enforced on GFX [MHz]
+  uint16_t SkinTemp;                   //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C]
+  uint16_t GfxclkFrequency;            //Time filtered target GFXCLK frequency [MHz]
+  uint16_t FclkFrequency;              //Time filtered target FCLK frequency [MHz]
+  uint16_t GfxActivity;                //Time filtered GFX busy % [0-100]
+  uint16_t SocclkFrequency;            //Time filtered target SOCCLK frequency [MHz]
+  uint16_t VclkFrequency;              //Time filtered target VCLK frequency [MHz]
+  uint16_t VcnActivity;                //Time filtered VCN busy % [0-100]
+  uint16_t VpeclkFrequency;            //Time filtered target VPECLK frequency [MHz]
+  uint16_t IpuclkFrequency;            //Time filtered target IPUCLK frequency [MHz]
+  uint16_t IpuBusy[8];                 //Time filtered IPU per-column busy % [0-100]
+  uint16_t DRAMReads;                  //Time filtered DRAM read bandwidth [MB/sec]
+  uint16_t DRAMWrites;                 //Time filtered DRAM write bandwidth [MB/sec]
+  uint16_t CoreC0Residency[16];        //Time filtered per-core C0 residency % [0-100]
+  uint16_t IpuPower;                   //Time filtered IPU power [mW]
+  uint32_t ApuPower;                   //Time filtered APU power [mW]
+  uint32_t GfxPower;                   //Time filtered GFX power [mW]
+  uint32_t dGpuPower;                  //Time filtered dGPU power [mW]
+  uint32_t SocketPower;                //Time filtered power used for PPT/STAPM [APU+dGPU] [mW]
+  uint32_t AllCorePower;               //Time filtered sum of core power across all cores in the socket [mW]
+  uint32_t FilterAlphaValue;           //Metrics table alpha filter time constant [us]
+  uint32_t MetricsCounter;             //Counter that is incremented on every metrics table update [PM_TIMER cycles]
+  uint16_t MemclkFrequency;            //Time filtered target MEMCLK frequency [MHz]
+  uint16_t MpipuclkFrequency;          //Time filtered target MPIPUCLK frequency [MHz]
+  uint16_t IpuReads;                   //Time filtered IPU read bandwidth [MB/sec]
+  uint16_t IpuWrites;                  //Time filtered IPU write bandwidth [MB/sec]
+  uint32_t ThrottleResidency_PROCHOT;  //Counter that is incremented on every metrics table update when PROCHOT was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_SPL;      //Counter that is incremented on every metrics table update when SPL was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_FPPT;     //Counter that is incremented on every metrics table update when fast PPT was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_SPPT;     //Counter that is incremented on every metrics table update when slow PPT was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_THM_CORE; //Counter that is incremented on every metrics table update when CORE thermal throttling was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_THM_GFX;  //Counter that is incremented on every metrics table update when GFX thermal throttling was engaged [PM_TIMER cycles]
+  uint32_t ThrottleResidency_THM_SOC;  //Counter that is incremented on every metrics table update when SOC thermal throttling was engaged [PM_TIMER cycles]
+  uint16_t Psys;                       //Time filtered Psys power [mW]
+  uint16_t spare1;
+  uint32_t spare[6];
 } SmuMetrics_t;
 
 //ISP tile definitions
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
index e2ee855c7748..e862d323caab 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
@@ -138,10 +138,9 @@
 #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
 #define PPSMC_MSG_SetPriorityDeltaGain           0x4B
 #define PPSMC_MSG_AllowIHHostInterrupt           0x4C
-
 #define PPSMC_MSG_DALNotPresent                  0x4E
-
-#define PPSMC_Message_Count                      0x4F
+#define PPSMC_MSG_EnableUCLKShadow               0x51
+#define PPSMC_Message_Count                      0x52
 
 //Debug Dump Message
 #define DEBUGSMC_MSG_TestMessage                    0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
index fef2d290f3f2..7b812b9994d7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
@@ -123,7 +123,7 @@ typedef enum {
   VOLTAGE_GUARDBAND_COUNT
 } GFX_GUARDBAND_e;
 
-#define SMU_METRICS_TABLE_VERSION 0x9
+#define SMU_METRICS_TABLE_VERSION 0xB
 
 typedef struct __attribute__((packed, aligned(4))) {
   uint32_t AccumulationCounter;
@@ -219,7 +219,103 @@ typedef struct __attribute__((packed, aligned(4))) {
   uint32_t PCIenReplayARolloverCountAcc;  // The Pcie counter itself is accumulated
   uint32_t PCIeNAKSentCountAcc;           // The Pcie counter itself is accumulated
   uint32_t PCIeNAKReceivedCountAcc;       // The Pcie counter itself is accumulated
-} MetricsTable_t;
+
+  // VCN/JPEG ACTIVITY
+  uint32_t VcnBusy[4];
+  uint32_t JpegBusy[32];
+} MetricsTableX_t;
+
+typedef struct __attribute__((packed, aligned(4))) {
+  uint32_t AccumulationCounter;
+
+  //TEMPERATURE
+  uint32_t MaxSocketTemperature;
+  uint32_t MaxVrTemperature;
+  uint32_t MaxHbmTemperature;
+  uint64_t MaxSocketTemperatureAcc;
+  uint64_t MaxVrTemperatureAcc;
+  uint64_t MaxHbmTemperatureAcc;
+
+  //POWER
+  uint32_t SocketPowerLimit;
+  uint32_t MaxSocketPowerLimit;
+  uint32_t SocketPower;
+
+  //ENERGY
+  uint64_t Timestamp;
+  uint64_t SocketEnergyAcc;
+  uint64_t CcdEnergyAcc;
+  uint64_t XcdEnergyAcc;
+  uint64_t AidEnergyAcc;
+  uint64_t HbmEnergyAcc;
+
+  //FREQUENCY
+  uint32_t CclkFrequencyLimit;
+  uint32_t GfxclkFrequencyLimit;
+  uint32_t FclkFrequency;
+  uint32_t UclkFrequency;
+  uint32_t SocclkFrequency[4];
+  uint32_t VclkFrequency[4];
+  uint32_t DclkFrequency[4];
+  uint32_t LclkFrequency[4];
+  uint64_t GfxclkFrequencyAcc[8];
+  uint64_t CclkFrequencyAcc[96];
+
+  //FREQUENCY RANGE
+  uint32_t MaxCclkFrequency;
+  uint32_t MinCclkFrequency;
+  uint32_t MaxGfxclkFrequency;
+  uint32_t MinGfxclkFrequency;
+  uint32_t FclkFrequencyTable[4];
+  uint32_t UclkFrequencyTable[4];
+  uint32_t SocclkFrequencyTable[4];
+  uint32_t VclkFrequencyTable[4];
+  uint32_t DclkFrequencyTable[4];
+  uint32_t LclkFrequencyTable[4];
+  uint32_t MaxLclkDpmRange;
+  uint32_t MinLclkDpmRange;
+
+  //XGMI
+  uint32_t XgmiWidth;
+  uint32_t XgmiBitrate;
+  uint64_t XgmiReadBandwidthAcc[8];
+  uint64_t XgmiWriteBandwidthAcc[8];
+
+  //ACTIVITY
+  uint32_t SocketC0Residency;
+  uint32_t SocketGfxBusy;
+  uint32_t DramBandwidthUtilization;
+  uint64_t SocketC0ResidencyAcc;
+  uint64_t SocketGfxBusyAcc;
+  uint64_t DramBandwidthAcc;
+  uint32_t MaxDramBandwidth;
+  uint64_t DramBandwidthUtilizationAcc;
+  uint64_t PcieBandwidthAcc[4];
+
+  //THROTTLERS
+  uint32_t ProchotResidencyAcc;
+  uint32_t PptResidencyAcc;
+  uint32_t SocketThmResidencyAcc;
+  uint32_t VrThmResidencyAcc;
+  uint32_t HbmThmResidencyAcc;
+  uint32_t GfxLockXCDMak;
+
+  // New Items at end to maintain driver compatibility
+  uint32_t GfxclkFrequency[8];
+
+  //PSNs
+  uint64_t PublicSerialNumber_AID[4];
+  uint64_t PublicSerialNumber_XCD[8];
+  uint64_t PublicSerialNumber_CCD[12];
+
+  //XGMI Data tranfser size
+  uint64_t XgmiReadDataSizeAcc[8];//in KByte
+  uint64_t XgmiWriteDataSizeAcc[8];//in KByte
+
+  // VCN/JPEG ACTIVITY
+  uint32_t VcnBusy[4];
+  uint32_t JpegBusy[32];
+} MetricsTableA_t;
 
 #define SMU_VF_METRICS_TABLE_VERSION 0x3
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
index 6aaefca9b595..a6bf9cdd130e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
@@ -134,6 +134,7 @@
 #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
 #define PPSMC_MSG_SetPriorityDeltaGain           0x4B
 #define PPSMC_MSG_AllowIHHostInterrupt           0x4C
-#define PPSMC_Message_Count                      0x4D
+#define PPSMC_MSG_EnableUCLKShadow               0x51
+#define PPSMC_Message_Count                      0x52
 
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 9dd47d91093e..953a767613b1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -259,7 +259,9 @@
 	__SMU_DUMMY_MAP(PowerUpUmsch),	\
 	__SMU_DUMMY_MAP(PowerDownUmsch),	\
 	__SMU_DUMMY_MAP(SetSoftMaxVpe),	\
-	__SMU_DUMMY_MAP(SetSoftMinVpe),
+	__SMU_DUMMY_MAP(SetSoftMinVpe), \
+	__SMU_DUMMY_MAP(GetMetricsVersion), \
+	__SMU_DUMMY_MAP(EnableUCLKShadow),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)	SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 95cb919718ae..fbd57fa1a004 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -210,15 +210,8 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu);
 int smu_v13_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
 					       struct pp_smu_nv_clock_table *max_clocks);
 
-int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
-				      enum smu_baco_seq baco_seq);
-
 bool smu_v13_0_baco_is_support(struct smu_context *smu);
 
-enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu);
-
-int smu_v13_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state);
-
 int smu_v13_0_baco_enter(struct smu_context *smu);
 int smu_v13_0_baco_exit(struct smu_context *smu);
 
@@ -301,5 +294,9 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
 
 int smu_v13_0_disable_pmfw_state(struct smu_context *smu);
 
+int smu_v13_0_enable_uclk_shadow(struct smu_context *smu, bool enable);
+
+int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu,
+						 struct freq_band_range *exclusion_ranges);
 #endif
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
index a5b569976f19..3f7463c1c1a9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
@@ -26,8 +26,8 @@
 #include "amdgpu_smu.h"
 
 #define SMU14_DRIVER_IF_VERSION_INV 0xFFFFFFFF
+#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x7
 #define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x1
-#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x6
 
 #define FEATURE_MASK(feature) (1ULL << feature)
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 2cb6b68222ba..4cd43bbec910 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -2407,8 +2407,6 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
 	.set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme,
 	.get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc,
 	.baco_is_support = smu_v11_0_baco_is_support,
-	.baco_get_state = smu_v11_0_baco_get_state,
-	.baco_set_state = smu_v11_0_baco_set_state,
 	.baco_enter = smu_v11_0_baco_enter,
 	.baco_exit = smu_v11_0_baco_exit,
 	.get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index a38233cc5b7f..8d1d29ffb0f1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -3537,8 +3537,6 @@ static const struct pptable_funcs navi10_ppt_funcs = {
 	.set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme,
 	.get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc,
 	.baco_is_support = smu_v11_0_baco_is_support,
-	.baco_get_state = smu_v11_0_baco_get_state,
-	.baco_set_state = smu_v11_0_baco_set_state,
 	.baco_enter = navi10_baco_enter,
 	.baco_exit = navi10_baco_exit,
 	.get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 1de9f8b5cc5f..21fc033528fa 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -4428,8 +4428,6 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
 	.set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme,
 	.get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc,
 	.baco_is_support = smu_v11_0_baco_is_support,
-	.baco_get_state = smu_v11_0_baco_get_state,
-	.baco_set_state = smu_v11_0_baco_set_state,
 	.baco_enter = sienna_cichlid_baco_enter,
 	.baco_exit = sienna_cichlid_baco_exit,
 	.mode1_reset_is_support = sienna_cichlid_is_mode1_reset_supported,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 762b31455a0b..2ff6deedef95 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -2193,8 +2193,7 @@ static int vangogh_get_dpm_clock_table(struct smu_context *smu, struct dpm_clock
 	return 0;
 }
 
-
-static int vangogh_system_features_control(struct smu_context *smu, bool en)
+static int vangogh_notify_rlc_state(struct smu_context *smu, bool en)
 {
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
@@ -2523,7 +2522,7 @@ static const struct pptable_funcs vangogh_ppt_funcs = {
 	.print_clk_levels = vangogh_common_print_clk_levels,
 	.set_default_dpm_table = vangogh_set_default_dpm_tables,
 	.set_fine_grain_gfx_freq_parameters = vangogh_set_fine_grain_gfx_freq_parameters,
-	.system_features_control = vangogh_system_features_control,
+	.notify_rlc_state = vangogh_notify_rlc_state,
 	.feature_is_enabled = smu_cmn_feature_is_enabled,
 	.set_power_profile_mode = vangogh_set_power_profile_mode,
 	.get_power_profile_mode = vangogh_get_power_profile_mode,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index 1a6675d70a4b..f1440869d1ce 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -257,8 +257,11 @@ static int aldebaran_tables_init(struct smu_context *smu)
 	}
 
 	smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL);
-	if (!smu_table->ecc_table)
+	if (!smu_table->ecc_table) {
+		kfree(smu_table->metrics_table);
+		kfree(smu_table->gpu_metrics_table);
 		return -ENOMEM;
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index cf1b84060bc3..771a3d457c33 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2199,7 +2199,7 @@ int smu_v13_0_gfx_ulv_control(struct smu_context *smu,
 	return ret;
 }
 
-int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
+static int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
 				      enum smu_baco_seq baco_seq)
 {
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
@@ -2221,33 +2221,14 @@ int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
 	return 0;
 }
 
-bool smu_v13_0_baco_is_support(struct smu_context *smu)
-{
-	struct smu_baco_context *smu_baco = &smu->smu_baco;
-
-	if (amdgpu_sriov_vf(smu->adev) ||
-	    !smu_baco->platform_support)
-		return false;
-
-	/* return true if ASIC is in BACO state already */
-	if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER)
-		return true;
-
-	if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) &&
-	    !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
-		return false;
-
-	return true;
-}
-
-enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu)
+static enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu)
 {
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
 
 	return smu_baco->state;
 }
 
-int smu_v13_0_baco_set_state(struct smu_context *smu,
+static int smu_v13_0_baco_set_state(struct smu_context *smu,
 			     enum smu_baco_state state)
 {
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
@@ -2281,24 +2262,60 @@ int smu_v13_0_baco_set_state(struct smu_context *smu,
 	return ret;
 }
 
-int smu_v13_0_baco_enter(struct smu_context *smu)
+bool smu_v13_0_baco_is_support(struct smu_context *smu)
 {
-	int ret = 0;
+	struct smu_baco_context *smu_baco = &smu->smu_baco;
 
-	ret = smu_v13_0_baco_set_state(smu,
-				       SMU_BACO_STATE_ENTER);
-	if (ret)
-		return ret;
+	if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support)
+		return false;
+
+	/* return true if ASIC is in BACO state already */
+	if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER)
+		return true;
 
-	msleep(10);
+	if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) &&
+	    !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
+		return false;
 
-	return ret;
+	return true;
+}
+
+int smu_v13_0_baco_enter(struct smu_context *smu)
+{
+	struct smu_baco_context *smu_baco = &smu->smu_baco;
+	struct amdgpu_device *adev = smu->adev;
+	int ret;
+
+	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
+		return smu_v13_0_baco_set_armd3_sequence(smu,
+				(smu_baco->maco_support && amdgpu_runtime_pm != 1) ?
+					BACO_SEQ_BAMACO : BACO_SEQ_BACO);
+	} else {
+		ret = smu_v13_0_baco_set_state(smu, SMU_BACO_STATE_ENTER);
+		if (!ret)
+			usleep_range(10000, 11000);
+
+		return ret;
+	}
 }
 
 int smu_v13_0_baco_exit(struct smu_context *smu)
 {
-	return smu_v13_0_baco_set_state(smu,
-					SMU_BACO_STATE_EXIT);
+	struct amdgpu_device *adev = smu->adev;
+	int ret;
+
+	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
+		/* Wait for PMFW handling for the Dstate change */
+		usleep_range(10000, 11000);
+		ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
+	} else {
+		ret = smu_v13_0_baco_set_state(smu, SMU_BACO_STATE_EXIT);
+	}
+
+	if (!ret)
+		adev->gfx.is_poweron = false;
+
+	return ret;
 }
 
 int smu_v13_0_set_gfx_power_up_by_imu(struct smu_context *smu)
@@ -2490,3 +2507,51 @@ int smu_v13_0_disable_pmfw_state(struct smu_context *smu)
 
 	return ret == 0 ? 0 : -EINVAL;
 }
+
+int smu_v13_0_enable_uclk_shadow(struct smu_context *smu, bool enable)
+{
+	return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnableUCLKShadow, enable, NULL);
+}
+
+int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu,
+						 struct freq_band_range *exclusion_ranges)
+{
+	WifiBandEntryTable_t wifi_bands;
+	int valid_entries = 0;
+	int ret, i;
+
+	memset(&wifi_bands, 0, sizeof(wifi_bands));
+	for (i = 0; i < ARRAY_SIZE(wifi_bands.WifiBandEntry); i++) {
+		if (!exclusion_ranges[i].start && !exclusion_ranges[i].end)
+			break;
+
+		/* PMFW expects the inputs to be in Mhz unit */
+		wifi_bands.WifiBandEntry[valid_entries].LowFreq =
+			DIV_ROUND_DOWN_ULL(exclusion_ranges[i].start, HZ_PER_MHZ);
+		wifi_bands.WifiBandEntry[valid_entries++].HighFreq =
+			DIV_ROUND_UP_ULL(exclusion_ranges[i].end, HZ_PER_MHZ);
+	}
+	wifi_bands.WifiBandEntryNum = valid_entries;
+
+	/*
+	 * Per confirm with PMFW team, WifiBandEntryNum = 0
+	 * is a valid setting.
+	 *
+	 * Considering the scenarios below:
+	 * - At first the wifi device adds an exclusion range e.g. (2400,2500) to
+	 *   BIOS and our driver gets notified. We will set WifiBandEntryNum = 1
+	 *   and pass the WifiBandEntry (2400, 2500) to PMFW.
+	 *
+	 * - Later the wifi device removes the wifiband list added above and
+	 *   our driver gets notified again. At this time, driver will set
+	 *   WifiBandEntryNum = 0 and pass an empty WifiBandEntry list to PMFW.
+	 *
+	 * - PMFW may still need to do some uclk shadow update(e.g. switching
+	 *   from shadow clock back to primary clock) on receiving this.
+	 */
+	ret = smu_cmn_update_table(smu, SMU_TABLE_WIFIBAND, 0, &wifi_bands, true);
+	if (ret)
+		dev_warn(smu->adev->dev, "Failed to set wifiband!");
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 82c4e1f1c6f0..231122622a9c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -169,6 +169,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
 	MSG_MAP(AllowIHHostInterrupt,		PPSMC_MSG_AllowIHHostInterrupt,       0),
 	MSG_MAP(ReenableAcDcInterrupt,		PPSMC_MSG_ReenableAcDcInterrupt,       0),
 	MSG_MAP(DALNotPresent,		PPSMC_MSG_DALNotPresent,       0),
+	MSG_MAP(EnableUCLKShadow,		PPSMC_MSG_EnableUCLKShadow,            0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
@@ -253,6 +254,7 @@ static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
 	TAB_MAP(I2C_COMMANDS),
 	TAB_MAP(ECCINFO),
 	TAB_MAP(OVERDRIVE),
+	TAB_MAP(WIFIBAND),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = {
@@ -498,6 +500,9 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
 			PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 	SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t),
 			PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+	SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND,
+		       sizeof(WifiBandEntryTable_t), PAGE_SIZE,
+		       AMDGPU_GEM_DOMAIN_VRAM);
 
 	smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL);
 	if (!smu_table->metrics_table)
@@ -2540,16 +2545,19 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
 
 	workload_mask = 1 << workload_type;
 
-	/* Add optimizations for SMU13.0.0.  Reuse the power saving profile */
-	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE &&
-	    (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0)) &&
-	    ((smu->adev->pm.fw_version == 0x004e6601) ||
-	     (smu->adev->pm.fw_version >= 0x004e7400))) {
-		workload_type = smu_cmn_to_asic_specific_index(smu,
-							       CMN2ASIC_MAPPING_WORKLOAD,
-							       PP_SMC_POWER_PROFILE_POWERSAVING);
-		if (workload_type >= 0)
-			workload_mask |= 1 << workload_type;
+	/* Add optimizations for SMU13.0.0/10.  Reuse the power saving profile */
+	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) {
+		if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) &&
+			((smu->adev->pm.fw_version == 0x004e6601) ||
+			(smu->adev->pm.fw_version >= 0x004e7300))) ||
+			(amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+			 smu->adev->pm.fw_version >= 0x00504500)) {
+			workload_type = smu_cmn_to_asic_specific_index(smu,
+														   CMN2ASIC_MAPPING_WORKLOAD,
+														   PP_SMC_POWER_PROFILE_POWERSAVING);
+			if (workload_type >= 0)
+				workload_mask |= 1 << workload_type;
+		}
 	}
 
 	return smu_cmn_send_smc_msg_with_param(smu,
@@ -2558,38 +2566,6 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
 					       NULL);
 }
 
-static int smu_v13_0_0_baco_enter(struct smu_context *smu)
-{
-	struct smu_baco_context *smu_baco = &smu->smu_baco;
-	struct amdgpu_device *adev = smu->adev;
-
-	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev))
-		return smu_v13_0_baco_set_armd3_sequence(smu,
-				(smu_baco->maco_support && amdgpu_runtime_pm != 1) ?
-					BACO_SEQ_BAMACO : BACO_SEQ_BACO);
-	else
-		return smu_v13_0_baco_enter(smu);
-}
-
-static int smu_v13_0_0_baco_exit(struct smu_context *smu)
-{
-	struct amdgpu_device *adev = smu->adev;
-	int ret;
-
-	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
-		/* Wait for PMFW handling for the Dstate change */
-		usleep_range(10000, 11000);
-		ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
-	} else {
-		ret = smu_v13_0_baco_exit(smu);
-	}
-
-	if (!ret)
-		adev->gfx.is_poweron = false;
-
-	return ret;
-}
-
 static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -2970,6 +2946,20 @@ static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu,
 	return ret;
 }
 
+static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	switch (adev->ip_versions[MP1_HWIP][0]) {
+	case IP_VERSION(13, 0, 0):
+		return smu->smc_fw_version >= 0x004e6300;
+	case IP_VERSION(13, 0, 10):
+		return smu->smc_fw_version >= 0x00503300;
+	default:
+		return false;
+	}
+}
+
 static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask,
 	.set_default_dpm_table = smu_v13_0_0_set_default_dpm_table,
@@ -3035,10 +3025,8 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.deep_sleep_control = smu_v13_0_deep_sleep_control,
 	.gfx_ulv_control = smu_v13_0_gfx_ulv_control,
 	.baco_is_support = smu_v13_0_baco_is_support,
-	.baco_get_state = smu_v13_0_baco_get_state,
-	.baco_set_state = smu_v13_0_baco_set_state,
-	.baco_enter = smu_v13_0_0_baco_enter,
-	.baco_exit = smu_v13_0_0_baco_exit,
+	.baco_enter = smu_v13_0_baco_enter,
+	.baco_exit = smu_v13_0_baco_exit,
 	.mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported,
 	.mode1_reset = smu_v13_0_0_mode1_reset,
 	.mode2_reset = smu_v13_0_0_mode2_reset,
@@ -3050,6 +3038,9 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.gpo_control = smu_v13_0_gpo_control,
 	.get_ecc_info = smu_v13_0_0_get_ecc_info,
 	.notify_display_change = smu_v13_0_notify_display_change,
+	.is_asic_wbrf_supported = smu_v13_0_0_wbrf_support_check,
+	.enable_uclk_shadow = smu_v13_0_enable_uclk_shadow,
+	.set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges,
 };
 
 void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 0e5a77c3c2e2..4ebc6b421c2c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -120,6 +120,7 @@ struct mca_ras_info {
 #define P2S_TABLE_ID_A 0x50325341
 #define P2S_TABLE_ID_X 0x50325358
 
+// clang-format off
 static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = {
 	MSG_MAP(TestMessage,			     PPSMC_MSG_TestMessage,			0),
 	MSG_MAP(GetSmuVersion,			     PPSMC_MSG_GetSmuVersion,			1),
@@ -128,6 +129,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
 	MSG_MAP(DisableAllSmuFeatures,		     PPSMC_MSG_DisableAllSmuFeatures,		0),
 	MSG_MAP(RequestI2cTransaction,		     PPSMC_MSG_RequestI2cTransaction,		0),
 	MSG_MAP(GetMetricsTable,		     PPSMC_MSG_GetMetricsTable,			1),
+	MSG_MAP(GetMetricsVersion,		     PPSMC_MSG_GetMetricsVersion,		1),
 	MSG_MAP(GetEnabledSmuFeaturesHigh,	     PPSMC_MSG_GetEnabledSmuFeaturesHigh,	1),
 	MSG_MAP(GetEnabledSmuFeaturesLow,	     PPSMC_MSG_GetEnabledSmuFeaturesLow,	1),
 	MSG_MAP(SetDriverDramAddrHigh,		     PPSMC_MSG_SetDriverDramAddrHigh,		1),
@@ -171,6 +173,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
 	MSG_MAP(SelectPLPDMode,                      PPSMC_MSG_SelectPLPDMode,                  0),
 };
 
+// clang-format on
 static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = {
 	CLK_MAP(SOCCLK, PPCLK_SOCCLK),
 	CLK_MAP(FCLK, PPCLK_FCLK),
@@ -245,6 +248,8 @@ struct PPTable_t {
 #define SMUQ10_TO_UINT(x) ((x) >> 10)
 #define SMUQ10_FRAC(x) ((x) & 0x3ff)
 #define SMUQ10_ROUND(x) ((SMUQ10_TO_UINT(x)) + ((SMUQ10_FRAC(x)) >= 0x200))
+#define GET_METRIC_FIELD(field) ((adev->flags & AMD_IS_APU) ?\
+		(metrics_a->field) : (metrics_x->field))
 
 struct smu_v13_0_6_dpm_map {
 	enum smu_clk_type clk_type;
@@ -327,7 +332,8 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
 		SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE,
 			       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 
-	SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(MetricsTable_t),
+	SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS,
+		       max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)),
 		       PAGE_SIZE,
 		       AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
 
@@ -335,12 +341,13 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
 		       PAGE_SIZE,
 		       AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
 
-	smu_table->metrics_table = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
+	smu_table->metrics_table = kzalloc(max(sizeof(MetricsTableX_t),
+		       sizeof(MetricsTableA_t)), GFP_KERNEL);
 	if (!smu_table->metrics_table)
 		return -ENOMEM;
 	smu_table->metrics_time = 0;
 
-	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_4);
+	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_5);
 	smu_table->gpu_metrics_table =
 		kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
 	if (!smu_table->gpu_metrics_table) {
@@ -428,13 +435,51 @@ static int smu_v13_0_6_get_metrics_table(struct smu_context *smu,
 	return 0;
 }
 
+static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu,
+					  void *metrics, size_t max_size)
+{
+	struct smu_table_context *smu_tbl_ctxt = &smu->smu_table;
+	uint32_t table_version = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].version;
+	uint32_t table_size = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].size;
+	struct amdgpu_pm_metrics *pm_metrics = metrics;
+	uint32_t pmfw_version;
+	int ret;
+
+	if (!pm_metrics || !max_size)
+		return -EINVAL;
+
+	if (max_size < (table_size + sizeof(pm_metrics->common_header)))
+		return -EOVERFLOW;
+
+	/* Don't use cached metrics data */
+	ret = smu_v13_0_6_get_metrics_table(smu, pm_metrics->data, true);
+	if (ret)
+		return ret;
+
+	smu_cmn_get_smc_version(smu, NULL, &pmfw_version);
+
+	memset(&pm_metrics->common_header, 0,
+	       sizeof(pm_metrics->common_header));
+	pm_metrics->common_header.mp1_ip_discovery_version =
+		IP_VERSION(13, 0, 6);
+	pm_metrics->common_header.pmfw_version = pmfw_version;
+	pm_metrics->common_header.pmmetrics_version = table_version;
+	pm_metrics->common_header.structure_size =
+		sizeof(pm_metrics->common_header) + table_size;
+
+	return pm_metrics->common_header.structure_size;
+}
+
 static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
-	MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table;
+	MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table;
+	MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table;
 	struct PPTable_t *pptable =
 		(struct PPTable_t *)smu_table->driver_pptable;
+	struct amdgpu_device *adev = smu->adev;
 	int ret, i, retry = 100;
+	uint32_t table_version;
 
 	/* Store one-time values in driver PPTable */
 	if (!pptable->Init) {
@@ -444,7 +489,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 				return ret;
 
 			/* Ensure that metrics have been updated */
-			if (metrics->AccumulationCounter)
+			if (GET_METRIC_FIELD(AccumulationCounter))
 				break;
 
 			usleep_range(1000, 1100);
@@ -453,30 +498,37 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 		if (!retry)
 			return -ETIME;
 
+		ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsVersion,
+					   &table_version);
+		if (ret)
+			return ret;
+		smu_table->tables[SMU_TABLE_SMU_METRICS].version =
+			table_version;
+
 		pptable->MaxSocketPowerLimit =
-			SMUQ10_ROUND(metrics->MaxSocketPowerLimit);
+			SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit));
 		pptable->MaxGfxclkFrequency =
-			SMUQ10_ROUND(metrics->MaxGfxclkFrequency);
+			SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency));
 		pptable->MinGfxclkFrequency =
-			SMUQ10_ROUND(metrics->MinGfxclkFrequency);
+			SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency));
 
 		for (i = 0; i < 4; ++i) {
 			pptable->FclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->FclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable)[i]);
 			pptable->UclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->UclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable)[i]);
 			pptable->SocclkFrequencyTable[i] = SMUQ10_ROUND(
-				metrics->SocclkFrequencyTable[i]);
+				GET_METRIC_FIELD(SocclkFrequencyTable)[i]);
 			pptable->VclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->VclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable)[i]);
 			pptable->DclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->DclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable)[i]);
 			pptable->LclkFrequencyTable[i] =
-				SMUQ10_ROUND(metrics->LclkFrequencyTable[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable)[i]);
 		}
 
 		/* use AID0 serial number by default */
-		pptable->PublicSerialNumber_AID = metrics->PublicSerialNumber_AID[0];
+		pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID)[0];
 
 		pptable->Init = true;
 	}
@@ -778,7 +830,8 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu,
 					    uint32_t *value)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
-	MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table;
+	MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table;
+	MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table;
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 	int xcc_id;
@@ -793,50 +846,50 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu,
 	case METRICS_AVERAGE_GFXCLK:
 		if (smu->smc_fw_version >= 0x552F00) {
 			xcc_id = GET_INST(GC, 0);
-			*value = SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]);
+			*value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]);
 		} else {
 			*value = 0;
 		}
 		break;
 	case METRICS_CURR_SOCCLK:
 	case METRICS_AVERAGE_SOCCLK:
-		*value = SMUQ10_ROUND(metrics->SocclkFrequency[0]);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[0]);
 		break;
 	case METRICS_CURR_UCLK:
 	case METRICS_AVERAGE_UCLK:
-		*value = SMUQ10_ROUND(metrics->UclkFrequency);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency));
 		break;
 	case METRICS_CURR_VCLK:
-		*value = SMUQ10_ROUND(metrics->VclkFrequency[0]);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[0]);
 		break;
 	case METRICS_CURR_DCLK:
-		*value = SMUQ10_ROUND(metrics->DclkFrequency[0]);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[0]);
 		break;
 	case METRICS_CURR_FCLK:
-		*value = SMUQ10_ROUND(metrics->FclkFrequency);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency));
 		break;
 	case METRICS_AVERAGE_GFXACTIVITY:
-		*value = SMUQ10_ROUND(metrics->SocketGfxBusy);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy));
 		break;
 	case METRICS_AVERAGE_MEMACTIVITY:
-		*value = SMUQ10_ROUND(metrics->DramBandwidthUtilization);
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization));
 		break;
 	case METRICS_CURR_SOCKETPOWER:
-		*value = SMUQ10_ROUND(metrics->SocketPower) << 8;
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)) << 8;
 		break;
 	case METRICS_TEMPERATURE_HOTSPOT:
-		*value = SMUQ10_ROUND(metrics->MaxSocketTemperature) *
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)) *
 			 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
 	case METRICS_TEMPERATURE_MEM:
-		*value = SMUQ10_ROUND(metrics->MaxHbmTemperature) *
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)) *
 			 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
 	/* This is the max of all VRs and not just SOC VR.
 	 * No need to define another data type for the same.
 	 */
 	case METRICS_TEMPERATURE_VRSOC:
-		*value = SMUQ10_ROUND(metrics->MaxVrTemperature) *
+		*value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)) *
 			 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
 	default:
@@ -1470,7 +1523,6 @@ static int smu_v13_0_6_mca_set_debug_mode(struct smu_context *smu, bool enable)
 	if (smu->smc_fw_version < 0x554800)
 		return 0;
 
-	amdgpu_ras_set_mca_debug_mode(smu->adev, enable);
 	return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ClearMcaOnRead,
 					       enable ? 0 : ClearMcaOnRead_UE_FLAG_MASK | ClearMcaOnRead_CE_POLL_MASK,
 					       NULL);
@@ -2022,67 +2074,70 @@ static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu)
 static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
-	struct gpu_metrics_v1_4 *gpu_metrics =
-		(struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table;
+	struct gpu_metrics_v1_5 *gpu_metrics =
+		(struct gpu_metrics_v1_5 *)smu_table->gpu_metrics_table;
 	struct amdgpu_device *adev = smu->adev;
-	int ret = 0, xcc_id, inst, i;
-	MetricsTable_t *metrics;
+	int ret = 0, xcc_id, inst, i, j;
+	MetricsTableX_t *metrics_x;
+	MetricsTableA_t *metrics_a;
 	u16 link_width_level;
 
-	metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
-	ret = smu_v13_0_6_get_metrics_table(smu, metrics, true);
+	metrics_x = kzalloc(max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), GFP_KERNEL);
+	ret = smu_v13_0_6_get_metrics_table(smu, metrics_x, true);
 	if (ret) {
-		kfree(metrics);
+		kfree(metrics_x);
 		return ret;
 	}
 
-	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4);
+	metrics_a = (MetricsTableA_t *)metrics_x;
+
+	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 5);
 
 	gpu_metrics->temperature_hotspot =
-		SMUQ10_ROUND(metrics->MaxSocketTemperature);
+		SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature));
 	/* Individual HBM stack temperature is not reported */
 	gpu_metrics->temperature_mem =
-		SMUQ10_ROUND(metrics->MaxHbmTemperature);
+		SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature));
 	/* Reports max temperature of all voltage rails */
 	gpu_metrics->temperature_vrsoc =
-		SMUQ10_ROUND(metrics->MaxVrTemperature);
+		SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature));
 
 	gpu_metrics->average_gfx_activity =
-		SMUQ10_ROUND(metrics->SocketGfxBusy);
+		SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy));
 	gpu_metrics->average_umc_activity =
-		SMUQ10_ROUND(metrics->DramBandwidthUtilization);
+		SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization));
 
 	gpu_metrics->curr_socket_power =
-		SMUQ10_ROUND(metrics->SocketPower);
+		SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower));
 	/* Energy counter reported in 15.259uJ (2^-16) units */
-	gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc;
+	gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc);
 
 	for (i = 0; i < MAX_GFX_CLKS; i++) {
 		xcc_id = GET_INST(GC, i);
 		if (xcc_id >= 0)
 			gpu_metrics->current_gfxclk[i] =
-				SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]);
 
 		if (i < MAX_CLKS) {
 			gpu_metrics->current_socclk[i] =
-				SMUQ10_ROUND(metrics->SocclkFrequency[i]);
+				SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[i]);
 			inst = GET_INST(VCN, i);
 			if (inst >= 0) {
 				gpu_metrics->current_vclk0[i] =
-					SMUQ10_ROUND(metrics->VclkFrequency[inst]);
+					SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[inst]);
 				gpu_metrics->current_dclk0[i] =
-					SMUQ10_ROUND(metrics->DclkFrequency[inst]);
+					SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[inst]);
 			}
 		}
 	}
 
-	gpu_metrics->current_uclk = SMUQ10_ROUND(metrics->UclkFrequency);
+	gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency));
 
 	/* Throttle status is not reported through metrics now */
 	gpu_metrics->throttle_status = 0;
 
 	/* Clock Lock Status. Each bit corresponds to each GFXCLK instance */
-	gpu_metrics->gfxclk_lock_status = metrics->GfxLockXCDMak >> GET_INST(GC, 0);
+	gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak) >> GET_INST(GC, 0);
 
 	if (!(adev->flags & AMD_IS_APU)) {
 		link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu);
@@ -2094,38 +2149,57 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 		gpu_metrics->pcie_link_speed =
 			smu_v13_0_6_get_current_pcie_link_speed(smu);
 		gpu_metrics->pcie_bandwidth_acc =
-				SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]);
+				SMUQ10_ROUND(metrics_x->PcieBandwidthAcc[0]);
 		gpu_metrics->pcie_bandwidth_inst =
-				SMUQ10_ROUND(metrics->PcieBandwidth[0]);
+				SMUQ10_ROUND(metrics_x->PcieBandwidth[0]);
 		gpu_metrics->pcie_l0_to_recov_count_acc =
-				metrics->PCIeL0ToRecoveryCountAcc;
+				metrics_x->PCIeL0ToRecoveryCountAcc;
 		gpu_metrics->pcie_replay_count_acc =
-				metrics->PCIenReplayAAcc;
+				metrics_x->PCIenReplayAAcc;
 		gpu_metrics->pcie_replay_rover_count_acc =
-				metrics->PCIenReplayARolloverCountAcc;
+				metrics_x->PCIenReplayARolloverCountAcc;
+		gpu_metrics->pcie_nak_sent_count_acc =
+				metrics_x->PCIeNAKSentCountAcc;
+		gpu_metrics->pcie_nak_rcvd_count_acc =
+				metrics_x->PCIeNAKReceivedCountAcc;
 	}
 
 	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
 
 	gpu_metrics->gfx_activity_acc =
-		SMUQ10_ROUND(metrics->SocketGfxBusyAcc);
+		SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc));
 	gpu_metrics->mem_activity_acc =
-		SMUQ10_ROUND(metrics->DramBandwidthUtilizationAcc);
+		SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc));
 
 	for (i = 0; i < NUM_XGMI_LINKS; i++) {
 		gpu_metrics->xgmi_read_data_acc[i] =
-			SMUQ10_ROUND(metrics->XgmiReadDataSizeAcc[i]);
+			SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc)[i]);
 		gpu_metrics->xgmi_write_data_acc[i] =
-			SMUQ10_ROUND(metrics->XgmiWriteDataSizeAcc[i]);
+			SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc)[i]);
 	}
 
-	gpu_metrics->xgmi_link_width = SMUQ10_ROUND(metrics->XgmiWidth);
-	gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(metrics->XgmiBitrate);
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		inst = GET_INST(JPEG, i);
+		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+			gpu_metrics->jpeg_activity[(i * adev->jpeg.num_jpeg_rings) + j] =
+				SMUQ10_ROUND(GET_METRIC_FIELD(JpegBusy)
+				[(inst * adev->jpeg.num_jpeg_rings) + j]);
+		}
+	}
 
-	gpu_metrics->firmware_timestamp = metrics->Timestamp;
+	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+		inst = GET_INST(VCN, i);
+		gpu_metrics->vcn_activity[i] =
+			SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy)[inst]);
+	}
+
+	gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth));
+	gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate));
+
+	gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp);
 
 	*table = (void *)gpu_metrics;
-	kfree(metrics);
+	kfree(metrics_x);
 
 	return sizeof(*gpu_metrics);
 }
@@ -2300,16 +2374,6 @@ static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu,
 	return ret;
 }
 
-static int smu_v13_0_6_post_init(struct smu_context *smu)
-{
-	struct amdgpu_device *adev = smu->adev;
-
-	if (!amdgpu_sriov_vf(adev) && adev->ras_enabled)
-		return smu_v13_0_6_mca_set_debug_mode(smu, false);
-
-	return 0;
-}
-
 static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
 {
 	struct smu_context *smu = adev->powerplay.pp_handle;
@@ -2392,8 +2456,8 @@ static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT
 
 static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info)
 {
-	uint64_t ipid = entry->regs[MCA_REG_IDX_IPID];
-	uint32_t insthi;
+	u64 ipid = entry->regs[MCA_REG_IDX_IPID];
+	u32 instidhi, instid;
 
 	/* NOTE: All MCA IPID register share the same format,
 	 * so the driver can share the MCMP1 register header file.
@@ -2402,9 +2466,15 @@ static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_
 	info->hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
 	info->mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType);
 
-	insthi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi);
-	info->aid = ((insthi >> 2) & 0x03);
-	info->socket_id = insthi & 0x03;
+	/*
+	 * Unfied DieID Format: SAASS. A:AID, S:Socket.
+	 * Unfied DieID[4] = InstanceId[0]
+	 * Unfied DieID[0:3] = InstanceIdHi[0:3]
+	 */
+	instidhi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi);
+	instid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo);
+	info->aid = ((instidhi >> 2) & 0x03);
+	info->socket_id = ((instid & 0x1) << 2) | (instidhi & 0x03);
 }
 
 static int mca_bank_read_reg(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
@@ -2483,9 +2553,9 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct
 		return 0;
 	}
 
-	if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(status0))
+	if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(adev, status0))
 		*count = 1;
-	else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(status0))
+	else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(adev, status0))
 		*count = 1;
 
 	return 0;
@@ -2496,13 +2566,15 @@ static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, st
 					  uint32_t *count)
 {
 	u32 ext_error_code;
+	u32 err_cnt;
 
 	ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]);
+	err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]);
 
 	if (type == AMDGPU_MCA_ERROR_TYPE_UE && ext_error_code == 0)
-		*count = 1;
+		*count = err_cnt;
 	else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6)
-		*count = 1;
+		*count = err_cnt;
 
 	return 0;
 }
@@ -2578,6 +2650,7 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct
 	uint32_t instlo;
 
 	instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo);
+	instlo &= GENMASK(31, 1);
 	switch (instlo) {
 	case 0x36430400: /* SMNAID XCD 0 */
 	case 0x38430400: /* SMNAID XCD 1 */
@@ -2593,13 +2666,21 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct
 static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
 				  enum amdgpu_mca_error_type type, struct mca_bank_entry *entry)
 {
+	struct smu_context *smu = adev->powerplay.pp_handle;
 	uint32_t errcode, instlo;
 
 	instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo);
+	instlo &= GENMASK(31, 1);
 	if (instlo != 0x03b30400)
 		return false;
 
-	errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode);
+	if (!(adev->flags & AMD_IS_APU) && smu->smc_fw_version >= 0x00555600) {
+		errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]);
+		errcode &= 0xff;
+	} else {
+		errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode);
+	}
+
 	return mca_smu_check_error_code(adev, mca_ras, errcode);
 }
 
@@ -2812,6 +2893,13 @@ static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu,
 	return ret;
 }
 
+static ssize_t smu_v13_0_6_get_ecc_info(struct smu_context *smu,
+			void *table)
+{
+	/* Support ecc info by default */
+	return 0;
+}
+
 static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
 	/* init dpm */
 	.get_allowed_feature_mask = smu_v13_0_6_get_allowed_feature_mask,
@@ -2856,6 +2944,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
 	.log_thermal_throttling_event = smu_v13_0_6_log_thermal_throttling_event,
 	.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
 	.get_gpu_metrics = smu_v13_0_6_get_gpu_metrics,
+	.get_pm_metrics = smu_v13_0_6_get_pm_metrics,
 	.get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range,
 	.mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported,
 	.mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported,
@@ -2865,7 +2954,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
 	.i2c_init = smu_v13_0_6_i2c_control_init,
 	.i2c_fini = smu_v13_0_6_i2c_control_fini,
 	.send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num,
-	.post_init = smu_v13_0_6_post_init,
+	.get_ecc_info = smu_v13_0_6_get_ecc_info,
 };
 
 void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 81eafed76045..59606a19e3d2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -140,6 +140,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
 	MSG_MAP(AllowGpo,			PPSMC_MSG_SetGpoAllow,           0),
 	MSG_MAP(GetPptLimit,			PPSMC_MSG_GetPptLimit,                 0),
 	MSG_MAP(NotifyPowerSource,		PPSMC_MSG_NotifyPowerSource,           0),
+	MSG_MAP(EnableUCLKShadow,		PPSMC_MSG_EnableUCLKShadow,            0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = {
@@ -222,6 +223,7 @@ static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = {
 	TAB_MAP(ACTIVITY_MONITOR_COEFF),
 	[SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE},
 	TAB_MAP(OVERDRIVE),
+	TAB_MAP(WIFIBAND),
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_pwr_src_map[SMU_POWER_SOURCE_COUNT] = {
@@ -512,6 +514,9 @@ static int smu_v13_0_7_tables_init(struct smu_context *smu)
 		       AMDGPU_GEM_DOMAIN_VRAM);
 	SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE,
 			PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+	SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND,
+		       sizeof(WifiBandEntryTable_t), PAGE_SIZE,
+		       AMDGPU_GEM_DOMAIN_VRAM);
 
 	smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL);
 	if (!smu_table->metrics_table)
@@ -2515,38 +2520,6 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu,
 	return ret;
 }
 
-static int smu_v13_0_7_baco_enter(struct smu_context *smu)
-{
-	struct smu_baco_context *smu_baco = &smu->smu_baco;
-	struct amdgpu_device *adev = smu->adev;
-
-	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev))
-		return smu_v13_0_baco_set_armd3_sequence(smu,
-				(smu_baco->maco_support && amdgpu_runtime_pm != 1) ?
-					BACO_SEQ_BAMACO : BACO_SEQ_BACO);
-	else
-		return smu_v13_0_baco_enter(smu);
-}
-
-static int smu_v13_0_7_baco_exit(struct smu_context *smu)
-{
-	struct amdgpu_device *adev = smu->adev;
-	int ret;
-
-	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
-		/* Wait for PMFW handling for the Dstate change */
-		usleep_range(10000, 11000);
-		ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
-	} else {
-		ret = smu_v13_0_baco_exit(smu);
-	}
-
-	if (!ret)
-		adev->gfx.is_poweron = false;
-
-	return ret;
-}
-
 static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -2567,6 +2540,11 @@ static int smu_v13_0_7_set_df_cstate(struct smu_context *smu,
 					       NULL);
 }
 
+static bool smu_v13_0_7_wbrf_support_check(struct smu_context *smu)
+{
+	return smu->smc_fw_version > 0x00524600;
+}
+
 static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
 	.get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
 	.set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
@@ -2626,15 +2604,16 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
 	.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
 	.set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
 	.baco_is_support = smu_v13_0_baco_is_support,
-	.baco_get_state = smu_v13_0_baco_get_state,
-	.baco_set_state = smu_v13_0_baco_set_state,
-	.baco_enter = smu_v13_0_7_baco_enter,
-	.baco_exit = smu_v13_0_7_baco_exit,
+	.baco_enter = smu_v13_0_baco_enter,
+	.baco_exit = smu_v13_0_baco_exit,
 	.mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported,
 	.mode1_reset = smu_v13_0_mode1_reset,
 	.set_mp1_state = smu_v13_0_7_set_mp1_state,
 	.set_df_cstate = smu_v13_0_7_set_df_cstate,
 	.gpo_control = smu_v13_0_gpo_control,
+	.is_asic_wbrf_supported = smu_v13_0_7_wbrf_support_check,
+	.enable_uclk_shadow = smu_v13_0_enable_uclk_shadow,
+	.set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges,
 };
 
 void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
index d8f8ad0e7137..4894f7ee737b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
@@ -224,7 +224,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
 	if (smu->is_apu)
 		adev->pm.fw_version = smu_version;
 
-	switch (adev->ip_versions[MP1_HWIP][0]) {
+	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
 	case IP_VERSION(14, 0, 2):
 		smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_2;
 		break;
@@ -235,7 +235,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
 		break;
 	default:
 		dev_err(adev->dev, "smu unsupported IP version: 0x%x.\n",
-			adev->ip_versions[MP1_HWIP][0]);
+			amdgpu_ip_version(adev, MP1_HWIP, 0));
 		smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_INV;
 		break;
 	}
@@ -733,7 +733,7 @@ int smu_v14_0_gfx_off_control(struct smu_context *smu, bool enable)
 	int ret = 0;
 	struct amdgpu_device *adev = smu->adev;
 
-	switch (adev->ip_versions[MP1_HWIP][0]) {
+	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
 	case IP_VERSION(14, 0, 2):
 	case IP_VERSION(14, 0, 0):
 		if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
index 03b38c3a9968..47fdbae4adfc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
@@ -246,11 +246,20 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu,
 		*value = 0;
 		break;
 	case METRICS_AVERAGE_UCLK:
-		*value = 0;
+		*value = metrics->MemclkFrequency;
 		break;
 	case METRICS_AVERAGE_FCLK:
 		*value = metrics->FclkFrequency;
 		break;
+	case METRICS_AVERAGE_VPECLK:
+		*value = metrics->VpeclkFrequency;
+		break;
+	case METRICS_AVERAGE_IPUCLK:
+		*value = metrics->IpuclkFrequency;
+		break;
+	case METRICS_AVERAGE_MPIPUCLK:
+		*value = metrics->MpipuclkFrequency;
+		break;
 	case METRICS_AVERAGE_GFXACTIVITY:
 		*value = metrics->GfxActivity / 100;
 		break;
@@ -270,8 +279,26 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu,
 		*value = metrics->SocTemperature / 100 *
 		SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 		break;
-	case METRICS_THROTTLER_STATUS:
-		*value = 0;
+	case METRICS_THROTTLER_RESIDENCY_PROCHOT:
+		*value = metrics->ThrottleResidency_PROCHOT;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_SPL:
+		*value = metrics->ThrottleResidency_SPL;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_FPPT:
+		*value = metrics->ThrottleResidency_FPPT;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_SPPT:
+		*value = metrics->ThrottleResidency_SPPT;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_THM_CORE:
+		*value = metrics->ThrottleResidency_THM_CORE;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_THM_GFX:
+		*value = metrics->ThrottleResidency_THM_GFX;
+		break;
+	case METRICS_THROTTLER_RESIDENCY_THM_SOC:
+		*value = metrics->ThrottleResidency_THM_SOC;
 		break;
 	case METRICS_VOLTAGE_VDDGFX:
 		*value = 0;
@@ -498,6 +525,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 		sizeof(uint16_t) * 16);
 	gpu_metrics->average_dram_reads = metrics.DRAMReads;
 	gpu_metrics->average_dram_writes = metrics.DRAMWrites;
+	gpu_metrics->average_ipu_reads = metrics.IpuReads;
+	gpu_metrics->average_ipu_writes = metrics.IpuWrites;
 
 	gpu_metrics->average_socket_power = metrics.SocketPower;
 	gpu_metrics->average_ipu_power = metrics.IpuPower;
@@ -505,6 +534,7 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 	gpu_metrics->average_gfx_power = metrics.GfxPower;
 	gpu_metrics->average_dgpu_power = metrics.dGpuPower;
 	gpu_metrics->average_all_core_power = metrics.AllCorePower;
+	gpu_metrics->average_sys_power = metrics.Psys;
 	memcpy(&gpu_metrics->average_core_power[0],
 		&metrics.CorePower[0],
 		sizeof(uint16_t) * 16);
@@ -515,6 +545,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 	gpu_metrics->average_fclk_frequency = metrics.FclkFrequency;
 	gpu_metrics->average_vclk_frequency = metrics.VclkFrequency;
 	gpu_metrics->average_ipuclk_frequency = metrics.IpuclkFrequency;
+	gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency;
+	gpu_metrics->average_mpipu_frequency = metrics.MpipuclkFrequency;
 
 	memcpy(&gpu_metrics->current_coreclk[0],
 		&metrics.CoreFrequency[0],
@@ -522,6 +554,14 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu,
 	gpu_metrics->current_core_maxfreq = metrics.InfrastructureCpuMaxFreq;
 	gpu_metrics->current_gfx_maxfreq = metrics.InfrastructureGfxMaxFreq;
 
+	gpu_metrics->throttle_residency_prochot = metrics.ThrottleResidency_PROCHOT;
+	gpu_metrics->throttle_residency_spl = metrics.ThrottleResidency_SPL;
+	gpu_metrics->throttle_residency_fppt = metrics.ThrottleResidency_FPPT;
+	gpu_metrics->throttle_residency_sppt = metrics.ThrottleResidency_SPPT;
+	gpu_metrics->throttle_residency_thm_core = metrics.ThrottleResidency_THM_CORE;
+	gpu_metrics->throttle_residency_thm_gfx = metrics.ThrottleResidency_THM_GFX;
+	gpu_metrics->throttle_residency_thm_soc = metrics.ThrottleResidency_THM_SOC;
+
 	gpu_metrics->time_filter_alphavalue = metrics.FilterAlphaValue;
 	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
 
@@ -1045,6 +1085,25 @@ static int smu_v14_0_0_set_umsch_mm_enable(struct smu_context *smu,
 					       0, NULL);
 }
 
+static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+	DpmClocks_t *clk_table = smu->smu_table.clocks_table;
+	uint8_t idx;
+
+	/* Only the Clock information of SOC and VPE is copied to provide VPE DPM settings for use. */
+	for (idx = 0; idx < NUM_SOCCLK_DPM_LEVELS; idx++) {
+		clock_table->SocClocks[idx].Freq = (idx < clk_table->NumSocClkLevelsEnabled) ? clk_table->SocClocks[idx]:0;
+		clock_table->SocClocks[idx].Vol = 0;
+	}
+
+	for (idx = 0; idx < NUM_VPE_DPM_LEVELS; idx++) {
+		clock_table->VPEClocks[idx].Freq = (idx < clk_table->VpeClkLevelsEnabled) ? clk_table->VPEClocks[idx]:0;
+		clock_table->VPEClocks[idx].Vol = 0;
+	}
+
+	return 0;
+}
+
 static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
 	.check_fw_status = smu_v14_0_check_fw_status,
 	.check_fw_version = smu_v14_0_check_fw_version,
@@ -1075,6 +1134,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
 	.set_gfx_power_up_by_imu = smu_v14_0_set_gfx_power_up_by_imu,
 	.dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable,
 	.dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable,
+	.get_dpm_clock_table = smu_14_0_0_get_dpm_table,
 };
 
 static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 001a5cf09657..00cd615bbcdc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -989,6 +989,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev)
 	case METRICS_VERSION(1, 4):
 		structure_size = sizeof(struct gpu_metrics_v1_4);
 		break;
+	case METRICS_VERSION(1, 5):
+		structure_size = sizeof(struct gpu_metrics_v1_5);
+		break;
 	case METRICS_VERSION(2, 0):
 		structure_size = sizeof(struct gpu_metrics_v2_0);
 		break;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
index 80b3c3efc006..6f4d212607d7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
@@ -97,6 +97,10 @@
 #define smu_get_default_config_table_settings(smu, config_table)	smu_ppt_funcs(get_default_config_table_settings, -EOPNOTSUPP, smu, config_table)
 #define smu_set_config_table(smu, config_table)				smu_ppt_funcs(set_config_table, -EOPNOTSUPP, smu, config_table)
 #define smu_init_pptable_microcode(smu)					smu_ppt_funcs(init_pptable_microcode, 0, smu)
+#define smu_notify_rlc_state(smu, en)					smu_ppt_funcs(notify_rlc_state, 0, smu, en)
+#define smu_is_asic_wbrf_supported(smu)			smu_ppt_funcs(is_asic_wbrf_supported, false, smu)
+#define smu_enable_uclk_shadow(smu, enable)		smu_ppt_funcs(enable_uclk_shadow, 0, smu, enable)
+#define smu_set_wbrf_exclusion_ranges(smu, freq_band_range)		smu_ppt_funcs(set_wbrf_exclusion_ranges, -EOPNOTSUPP, smu, freq_band_range)
 
 #endif
 #endif
diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c
index dc01c43f6193..d72c22dcf685 100644
--- a/drivers/gpu/drm/arm/malidp_crtc.c
+++ b/drivers/gpu/drm/arm/malidp_crtc.c
@@ -221,7 +221,7 @@ static int malidp_crtc_atomic_check_ctm(struct drm_crtc *crtc,
 
 	/*
 	 * The size of the ctm is checked in
-	 * drm_atomic_replace_property_blob_from_id.
+	 * drm_property_replace_blob_from_id.
 	 */
 	ctm = (struct drm_color_ctm *)state->ctm->data;
 	for (i = 0; i < ARRAY_SIZE(ctm->matrix); ++i) {
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index 19d2dc05c397..efd996f6c138 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -330,6 +330,7 @@ config DRM_TOSHIBA_TC358768
 	select REGMAP_I2C
 	select DRM_PANEL
 	select DRM_MIPI_DSI
+	select VIDEOMODE_HELPERS
 	help
 	  Toshiba TC358768AXBG/TC358778XBG DSI bridge chip driver.
 
diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c
index e48823a4f1ed..7f41525f7a6e 100644
--- a/drivers/gpu/drm/bridge/panel.c
+++ b/drivers/gpu/drm/bridge/panel.c
@@ -4,8 +4,6 @@
  * Copyright (C) 2017 Broadcom
  */
 
-#include <linux/device.h>
-
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
 #include <drm/drm_connector.h>
@@ -21,7 +19,6 @@ struct panel_bridge {
 	struct drm_bridge bridge;
 	struct drm_connector connector;
 	struct drm_panel *panel;
-	struct device_link *link;
 	u32 connector_type;
 };
 
@@ -63,24 +60,13 @@ static int panel_bridge_attach(struct drm_bridge *bridge,
 {
 	struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge);
 	struct drm_connector *connector = &panel_bridge->connector;
-	struct drm_panel *panel = panel_bridge->panel;
-	struct drm_device *drm_dev = bridge->dev;
 	int ret;
 
-	panel_bridge->link = device_link_add(drm_dev->dev, panel->dev,
-					     DL_FLAG_STATELESS);
-	if (!panel_bridge->link) {
-		DRM_ERROR("Failed to add device link between %s and %s\n",
-			  dev_name(drm_dev->dev), dev_name(panel->dev));
-		return -EINVAL;
-	}
-
 	if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR)
 		return 0;
 
 	if (!bridge->encoder) {
 		DRM_ERROR("Missing encoder\n");
-		device_link_del(panel_bridge->link);
 		return -ENODEV;
 	}
 
@@ -92,7 +78,6 @@ static int panel_bridge_attach(struct drm_bridge *bridge,
 				 panel_bridge->connector_type);
 	if (ret) {
 		DRM_ERROR("Failed to initialize connector\n");
-		device_link_del(panel_bridge->link);
 		return ret;
 	}
 
@@ -115,8 +100,6 @@ static void panel_bridge_detach(struct drm_bridge *bridge)
 	struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge);
 	struct drm_connector *connector = &panel_bridge->connector;
 
-	device_link_del(panel_bridge->link);
-
 	/*
 	 * Cleanup the connector if we know it was initialized.
 	 *
diff --git a/drivers/gpu/drm/ci/build.sh b/drivers/gpu/drm/ci/build.sh
index 97ff43759b91..f73f3471e94e 100644
--- a/drivers/gpu/drm/ci/build.sh
+++ b/drivers/gpu/drm/ci/build.sh
@@ -58,6 +58,9 @@ git config --global user.email "fdo@example.com"
 git config --global user.name "freedesktop.org CI"
 git config --global pull.rebase true
 
+# cleanup git state on the worker
+rm -rf .git/rebase-merge
+
 # Try to merge fixes from target repo
 if [ "$(git ls-remote --exit-code --heads ${UPSTREAM_REPO} ${TARGET_BRANCH}-external-fixes)" ]; then
     git pull ${UPSTREAM_REPO} ${TARGET_BRANCH}-external-fixes
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index c31fc0b48c31..a91737adf8e7 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -733,6 +733,7 @@ static void drm_atomic_plane_print_state(struct drm_printer *p,
 		   drm_get_color_encoding_name(state->color_encoding));
 	drm_printf(p, "\tcolor-range=%s\n",
 		   drm_get_color_range_name(state->color_range));
+	drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed);
 
 	if (plane->funcs->atomic_print_state)
 		plane->funcs->atomic_print_state(p, state);
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index c98a766ca3bd..39ef0a6addeb 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -2014,7 +2014,7 @@ int drm_atomic_helper_commit(struct drm_device *dev,
 			return ret;
 
 		drm_atomic_helper_async_commit(dev, state);
-		drm_atomic_helper_cleanup_planes(dev, state);
+		drm_atomic_helper_unprepare_planes(dev, state);
 
 		return 0;
 	}
@@ -2074,7 +2074,7 @@ int drm_atomic_helper_commit(struct drm_device *dev,
 	return 0;
 
 err:
-	drm_atomic_helper_cleanup_planes(dev, state);
+	drm_atomic_helper_unprepare_planes(dev, state);
 	return ret;
 }
 EXPORT_SYMBOL(drm_atomic_helper_commit);
@@ -2652,6 +2652,39 @@ fail_prepare_fb:
 }
 EXPORT_SYMBOL(drm_atomic_helper_prepare_planes);
 
+/**
+ * drm_atomic_helper_unprepare_planes - release plane resources on aborts
+ * @dev: DRM device
+ * @state: atomic state object with old state structures
+ *
+ * This function cleans up plane state, specifically framebuffers, from the
+ * atomic state. It undoes the effects of drm_atomic_helper_prepare_planes()
+ * when aborting an atomic commit. For cleaning up after a successful commit
+ * use drm_atomic_helper_cleanup_planes().
+ */
+void drm_atomic_helper_unprepare_planes(struct drm_device *dev,
+					struct drm_atomic_state *state)
+{
+	struct drm_plane *plane;
+	struct drm_plane_state *new_plane_state;
+	int i;
+
+	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+		const struct drm_plane_helper_funcs *funcs = plane->helper_private;
+
+		if (funcs->end_fb_access)
+			funcs->end_fb_access(plane, new_plane_state);
+	}
+
+	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+		const struct drm_plane_helper_funcs *funcs = plane->helper_private;
+
+		if (funcs->cleanup_fb)
+			funcs->cleanup_fb(plane, new_plane_state);
+	}
+}
+EXPORT_SYMBOL(drm_atomic_helper_unprepare_planes);
+
 static bool plane_crtc_active(const struct drm_plane_state *state)
 {
 	return state->crtc && state->crtc->state->active;
@@ -2786,6 +2819,17 @@ void drm_atomic_helper_commit_planes(struct drm_device *dev,
 
 		funcs->atomic_flush(crtc, old_state);
 	}
+
+	/*
+	 * Signal end of framebuffer access here before hw_done. After hw_done,
+	 * a later commit might have already released the plane state.
+	 */
+	for_each_old_plane_in_state(old_state, plane, old_plane_state, i) {
+		const struct drm_plane_helper_funcs *funcs = plane->helper_private;
+
+		if (funcs->end_fb_access)
+			funcs->end_fb_access(plane, old_plane_state);
+	}
 }
 EXPORT_SYMBOL(drm_atomic_helper_commit_planes);
 
@@ -2913,40 +2957,22 @@ EXPORT_SYMBOL(drm_atomic_helper_disable_planes_on_crtc);
  * configuration. Hence the old configuration must be perserved in @old_state to
  * be able to call this function.
  *
- * This function must also be called on the new state when the atomic update
- * fails at any point after calling drm_atomic_helper_prepare_planes().
+ * This function may not be called on the new state when the atomic update
+ * fails at any point after calling drm_atomic_helper_prepare_planes(). Use
+ * drm_atomic_helper_unprepare_planes() in this case.
  */
 void drm_atomic_helper_cleanup_planes(struct drm_device *dev,
 				      struct drm_atomic_state *old_state)
 {
 	struct drm_plane *plane;
-	struct drm_plane_state *old_plane_state, *new_plane_state;
+	struct drm_plane_state *old_plane_state;
 	int i;
 
-	for_each_oldnew_plane_in_state(old_state, plane, old_plane_state, new_plane_state, i) {
+	for_each_old_plane_in_state(old_state, plane, old_plane_state, i) {
 		const struct drm_plane_helper_funcs *funcs = plane->helper_private;
 
-		if (funcs->end_fb_access)
-			funcs->end_fb_access(plane, new_plane_state);
-	}
-
-	for_each_oldnew_plane_in_state(old_state, plane, old_plane_state, new_plane_state, i) {
-		const struct drm_plane_helper_funcs *funcs;
-		struct drm_plane_state *plane_state;
-
-		/*
-		 * This might be called before swapping when commit is aborted,
-		 * in which case we have to cleanup the new state.
-		 */
-		if (old_plane_state == plane->state)
-			plane_state = new_plane_state;
-		else
-			plane_state = old_plane_state;
-
-		funcs = plane->helper_private;
-
 		if (funcs->cleanup_fb)
-			funcs->cleanup_fb(plane, plane_state);
+			funcs->cleanup_fb(plane, old_plane_state);
 	}
 }
 EXPORT_SYMBOL(drm_atomic_helper_cleanup_planes);
diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
index 54975de44a0e..519228eb1095 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -352,6 +352,7 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane,
 	state->fence = NULL;
 	state->commit = NULL;
 	state->fb_damage_clips = NULL;
+	state->color_mgmt_changed = false;
 }
 EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state);
 
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index aee4a65d4959..29d4940188d4 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -362,48 +362,6 @@ static s32 __user *get_out_fence_for_connector(struct drm_atomic_state *state,
 	return fence_ptr;
 }
 
-static int
-drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
-					 struct drm_property_blob **blob,
-					 uint64_t blob_id,
-					 ssize_t expected_size,
-					 ssize_t expected_elem_size,
-					 bool *replaced)
-{
-	struct drm_property_blob *new_blob = NULL;
-
-	if (blob_id != 0) {
-		new_blob = drm_property_lookup_blob(dev, blob_id);
-		if (new_blob == NULL) {
-			drm_dbg_atomic(dev,
-				       "cannot find blob ID %llu\n", blob_id);
-			return -EINVAL;
-		}
-
-		if (expected_size > 0 &&
-		    new_blob->length != expected_size) {
-			drm_dbg_atomic(dev,
-				       "[BLOB:%d] length %zu different from expected %zu\n",
-				       new_blob->base.id, new_blob->length, expected_size);
-			drm_property_blob_put(new_blob);
-			return -EINVAL;
-		}
-		if (expected_elem_size > 0 &&
-		    new_blob->length % expected_elem_size != 0) {
-			drm_dbg_atomic(dev,
-				       "[BLOB:%d] length %zu not divisible by element size %zu\n",
-				       new_blob->base.id, new_blob->length, expected_elem_size);
-			drm_property_blob_put(new_blob);
-			return -EINVAL;
-		}
-	}
-
-	*replaced |= drm_property_replace_blob(blob, new_blob);
-	drm_property_blob_put(new_blob);
-
-	return 0;
-}
-
 static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		struct drm_crtc_state *state, struct drm_property *property,
 		uint64_t val)
@@ -424,7 +382,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 	} else if (property == config->prop_vrr_enabled) {
 		state->vrr_enabled = val;
 	} else if (property == config->degamma_lut_property) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 					&state->degamma_lut,
 					val,
 					-1, sizeof(struct drm_color_lut),
@@ -432,7 +390,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		state->color_mgmt_changed |= replaced;
 		return ret;
 	} else if (property == config->ctm_property) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 					&state->ctm,
 					val,
 					sizeof(struct drm_color_ctm), -1,
@@ -440,7 +398,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		state->color_mgmt_changed |= replaced;
 		return ret;
 	} else if (property == config->gamma_lut_property) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 					&state->gamma_lut,
 					val,
 					-1, sizeof(struct drm_color_lut),
@@ -581,7 +539,7 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane,
 	} else if (property == plane->color_range_property) {
 		state->color_range = val;
 	} else if (property == config->prop_fb_damage_clips) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 					&state->fb_damage_clips,
 					val,
 					-1,
@@ -778,7 +736,7 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector,
 		if (state->link_status != DRM_LINK_STATUS_GOOD)
 			state->link_status = val;
 	} else if (property == config->hdr_output_metadata_property) {
-		ret = drm_atomic_replace_property_blob_from_id(dev,
+		ret = drm_property_replace_blob_from_id(dev,
 				&state->hdr_output_metadata,
 				val,
 				sizeof(struct hdr_output_metadata), -1,
diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
index 5d2809de4517..48ee851b61d9 100644
--- a/drivers/gpu/drm/drm_exec.c
+++ b/drivers/gpu/drm/drm_exec.c
@@ -69,16 +69,23 @@ static void drm_exec_unlock_all(struct drm_exec *exec)
  * drm_exec_init - initialize a drm_exec object
  * @exec: the drm_exec object to initialize
  * @flags: controls locking behavior, see DRM_EXEC_* defines
+ * @nr: the initial # of objects
  *
  * Initialize the object and make sure that we can track locked objects.
+ *
+ * If nr is non-zero then it is used as the initial objects table size.
+ * In either case, the table will grow (be re-allocated) on demand.
  */
-void drm_exec_init(struct drm_exec *exec, uint32_t flags)
+void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr)
 {
+	if (!nr)
+		nr = PAGE_SIZE / sizeof(void *);
+
 	exec->flags = flags;
-	exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	exec->objects = kvmalloc_array(nr, sizeof(void *), GFP_KERNEL);
 
 	/* If allocation here fails, just delay that till the first use */
-	exec->max_objects = exec->objects ? PAGE_SIZE / sizeof(void *) : 0;
+	exec->max_objects = exec->objects ? nr : 0;
 	exec->num_objects = 0;
 	exec->contended = DRM_EXEC_DUMMY;
 	exec->prelocked = NULL;
diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index 9c0922c1a5a2..f9eb56f24bef 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
 /*
  * Copyright (c) 2022 Red Hat.
  *
@@ -1250,7 +1250,7 @@ drm_gpuvm_exec_lock(struct drm_gpuvm_exec *vm_exec)
 	unsigned int num_fences = vm_exec->num_fences;
 	int ret;
 
-	drm_exec_init(exec, vm_exec->flags);
+	drm_exec_init(exec, vm_exec->flags, 0);
 
 	drm_exec_until_all_locked(exec) {
 		ret = drm_gpuvm_prepare_vm(gpuvm, exec, num_fences);
@@ -1341,7 +1341,7 @@ drm_gpuvm_exec_lock_range(struct drm_gpuvm_exec *vm_exec,
 	struct drm_exec *exec = &vm_exec->exec;
 	int ret;
 
-	drm_exec_init(exec, vm_exec->flags);
+	drm_exec_init(exec, vm_exec->flags, 0);
 
 	drm_exec_until_all_locked(exec) {
 		ret = drm_gpuvm_prepare_range(gpuvm, exec, addr, range,
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 63b709a67471..834a5e28abbe 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -278,7 +278,7 @@ void drm_gem_dmabuf_release(struct dma_buf *dma_buf)
 }
 EXPORT_SYMBOL(drm_gem_dmabuf_release);
 
-/*
+/**
  * drm_gem_prime_fd_to_handle - PRIME import function for GEM drivers
  * @dev: drm_device to import into
  * @file_priv: drm file-private structure
@@ -292,9 +292,9 @@ EXPORT_SYMBOL(drm_gem_dmabuf_release);
  *
  * Returns 0 on success or a negative error code on failure.
  */
-static int drm_gem_prime_fd_to_handle(struct drm_device *dev,
-				      struct drm_file *file_priv, int prime_fd,
-				      uint32_t *handle)
+int drm_gem_prime_fd_to_handle(struct drm_device *dev,
+			       struct drm_file *file_priv, int prime_fd,
+			       uint32_t *handle)
 {
 	struct dma_buf *dma_buf;
 	struct drm_gem_object *obj;
@@ -360,6 +360,7 @@ out_put:
 	dma_buf_put(dma_buf);
 	return ret;
 }
+EXPORT_SYMBOL(drm_gem_prime_fd_to_handle);
 
 int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file_priv)
@@ -408,7 +409,7 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev,
 	return dmabuf;
 }
 
-/*
+/**
  * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers
  * @dev: dev to export the buffer from
  * @file_priv: drm file-private structure
@@ -421,10 +422,10 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev,
  * The actual exporting from GEM object to a dma-buf is done through the
  * &drm_gem_object_funcs.export callback.
  */
-static int drm_gem_prime_handle_to_fd(struct drm_device *dev,
-				      struct drm_file *file_priv, uint32_t handle,
-				      uint32_t flags,
-				      int *prime_fd)
+int drm_gem_prime_handle_to_fd(struct drm_device *dev,
+			       struct drm_file *file_priv, uint32_t handle,
+			       uint32_t flags,
+			       int *prime_fd)
 {
 	struct drm_gem_object *obj;
 	int ret = 0;
@@ -506,6 +507,7 @@ out_unlock:
 
 	return ret;
 }
+EXPORT_SYMBOL(drm_gem_prime_handle_to_fd);
 
 int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file_priv)
@@ -864,9 +866,9 @@ EXPORT_SYMBOL(drm_prime_get_contiguous_size);
  * @obj: GEM object to export
  * @flags: flags like DRM_CLOEXEC and DRM_RDWR
  *
- * This is the implementation of the &drm_gem_object_funcs.export functions
- * for GEM drivers using the PRIME helpers. It is used as the default for
- * drivers that do not set their own.
+ * This is the implementation of the &drm_gem_object_funcs.export functions for GEM drivers
+ * using the PRIME helpers. It is used as the default in
+ * drm_gem_prime_handle_to_fd().
  */
 struct dma_buf *drm_gem_prime_export(struct drm_gem_object *obj,
 				     int flags)
@@ -962,9 +964,10 @@ EXPORT_SYMBOL(drm_gem_prime_import_dev);
  * @dev: drm_device to import into
  * @dma_buf: dma-buf object to import
  *
- * This is the implementation of the gem_prime_import functions for GEM
- * drivers using the PRIME helpers. It is the default for drivers that do
- * not set their own &drm_driver.gem_prime_import.
+ * This is the implementation of the gem_prime_import functions for GEM drivers
+ * using the PRIME helpers. Drivers can use this as their
+ * &drm_driver.gem_prime_import implementation. It is used as the default
+ * implementation in drm_gem_prime_fd_to_handle().
  *
  * Drivers must arrange to call drm_prime_gem_destroy() from their
  * &drm_gem_object_funcs.free hook when using this function.
diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index dfec479830e4..596272149a35 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -27,6 +27,7 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
 #include <drm/drm_framebuffer.h>
+#include <drm/drm_print.h>
 #include <drm/drm_property.h>
 
 #include "drm_crtc_internal.h"
@@ -751,6 +752,64 @@ bool drm_property_replace_blob(struct drm_property_blob **blob,
 }
 EXPORT_SYMBOL(drm_property_replace_blob);
 
+/**
+ * drm_property_replace_blob_from_id - replace a blob property taking a reference
+ * @dev: DRM device
+ * @blob: a pointer to the member blob to be replaced
+ * @blob_id: the id of the new blob to replace with
+ * @expected_size: expected size of the blob property
+ * @expected_elem_size: expected size of an element in the blob property
+ * @replaced: if the blob was in fact replaced
+ *
+ * Look up the new blob from id, take its reference, check expected sizes of
+ * the blob and its element and replace the old blob by the new one. Advertise
+ * if the replacement operation was successful.
+ *
+ * Return: true if the blob was in fact replaced. -EINVAL if the new blob was
+ * not found or sizes don't match.
+ */
+int drm_property_replace_blob_from_id(struct drm_device *dev,
+					 struct drm_property_blob **blob,
+					 uint64_t blob_id,
+					 ssize_t expected_size,
+					 ssize_t expected_elem_size,
+					 bool *replaced)
+{
+	struct drm_property_blob *new_blob = NULL;
+
+	if (blob_id != 0) {
+		new_blob = drm_property_lookup_blob(dev, blob_id);
+		if (new_blob == NULL) {
+			drm_dbg_atomic(dev,
+				       "cannot find blob ID %llu\n", blob_id);
+			return -EINVAL;
+		}
+
+		if (expected_size > 0 &&
+		    new_blob->length != expected_size) {
+			drm_dbg_atomic(dev,
+				       "[BLOB:%d] length %zu different from expected %zu\n",
+				       new_blob->base.id, new_blob->length, expected_size);
+			drm_property_blob_put(new_blob);
+			return -EINVAL;
+		}
+		if (expected_elem_size > 0 &&
+		    new_blob->length % expected_elem_size != 0) {
+			drm_dbg_atomic(dev,
+				       "[BLOB:%d] length %zu not divisible by element size %zu\n",
+				       new_blob->base.id, new_blob->length, expected_elem_size);
+			drm_property_blob_put(new_blob);
+			return -EINVAL;
+		}
+	}
+
+	*replaced |= drm_property_replace_blob(blob, new_blob);
+	drm_property_blob_put(new_blob);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_property_replace_blob_from_id);
+
 int drm_mode_getblob_ioctl(struct drm_device *dev,
 			   void *data, struct drm_file *file_priv)
 {
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 4d986077738b..776f2f0b602d 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -862,18 +862,16 @@ err_disable_pm_runtime:
 	return ret;
 }
 
-static int exynos5433_decon_remove(struct platform_device *pdev)
+static void exynos5433_decon_remove(struct platform_device *pdev)
 {
 	pm_runtime_disable(&pdev->dev);
 
 	component_del(&pdev->dev, &decon_component_ops);
-
-	return 0;
 }
 
 struct platform_driver exynos5433_decon_driver = {
 	.probe		= exynos5433_decon_probe,
-	.remove		= exynos5433_decon_remove,
+	.remove_new	= exynos5433_decon_remove,
 	.driver		= {
 		.name	= "exynos5433-decon",
 		.pm	= pm_ptr(&exynos5433_decon_pm_ops),
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index 0156a5e94435..0d185c0564b9 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -765,7 +765,7 @@ err_iounmap:
 	return ret;
 }
 
-static int decon_remove(struct platform_device *pdev)
+static void decon_remove(struct platform_device *pdev)
 {
 	struct decon_context *ctx = dev_get_drvdata(&pdev->dev);
 
@@ -774,8 +774,6 @@ static int decon_remove(struct platform_device *pdev)
 	iounmap(ctx->regs);
 
 	component_del(&pdev->dev, &decon_component_ops);
-
-	return 0;
 }
 
 static int exynos7_decon_suspend(struct device *dev)
@@ -840,7 +838,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(exynos7_decon_pm_ops, exynos7_decon_suspend,
 
 struct platform_driver decon_driver = {
 	.probe		= decon_probe,
-	.remove		= decon_remove,
+	.remove_new	= decon_remove,
 	.driver		= {
 		.name	= "exynos-decon",
 		.pm	= pm_ptr(&exynos7_decon_pm_ops),
diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index 3404ec1367fb..ca31bad6c576 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -250,14 +250,12 @@ out:
 	return component_add(&pdev->dev, &exynos_dp_ops);
 }
 
-static int exynos_dp_remove(struct platform_device *pdev)
+static void exynos_dp_remove(struct platform_device *pdev)
 {
 	struct exynos_dp_device *dp = platform_get_drvdata(pdev);
 
 	component_del(&pdev->dev, &exynos_dp_ops);
 	analogix_dp_remove(dp->adp);
-
-	return 0;
 }
 
 static int exynos_dp_suspend(struct device *dev)
@@ -285,7 +283,7 @@ MODULE_DEVICE_TABLE(of, exynos_dp_match);
 
 struct platform_driver dp_driver = {
 	.probe		= exynos_dp_probe,
-	.remove		= exynos_dp_remove,
+	.remove_new	= exynos_dp_remove,
 	.driver		= {
 		.name	= "exynos-dp",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c
index a971590b8132..e2c7373f20c6 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dma.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c
@@ -107,18 +107,16 @@ int exynos_drm_register_dma(struct drm_device *drm, struct device *dev,
 		return 0;
 
 	if (!priv->mapping) {
-		void *mapping;
+		void *mapping = NULL;
 
 		if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))
 			mapping = arm_iommu_create_mapping(&platform_bus_type,
 				EXYNOS_DEV_ADDR_START, EXYNOS_DEV_ADDR_SIZE);
 		else if (IS_ENABLED(CONFIG_IOMMU_DMA))
 			mapping = iommu_get_domain_for_dev(priv->dma_dev);
-		else
-			mapping = ERR_PTR(-ENODEV);
 
-		if (IS_ERR(mapping))
-			return PTR_ERR(mapping);
+		if (!mapping)
+			return -ENODEV;
 		priv->mapping = mapping;
 	}
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
index 378e5381978f..0dc36df6ada3 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
@@ -101,7 +101,7 @@ static int exynos_dpi_create_connector(struct drm_encoder *encoder)
 
 	ret = drm_connector_init(encoder->dev, connector,
 				 &exynos_dpi_connector_funcs,
-				 DRM_MODE_CONNECTOR_VGA);
+				 DRM_MODE_CONNECTOR_DPI);
 	if (ret) {
 		DRM_DEV_ERROR(ctx->dev,
 			      "failed to initialize connector with drm\n");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 8399256cb5c9..7c59e1164a48 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -300,6 +300,7 @@ err_mode_config_cleanup:
 	drm_mode_config_cleanup(drm);
 	exynos_drm_cleanup_dma(drm);
 	kfree(private);
+	dev_set_drvdata(dev, NULL);
 err_free_drm:
 	drm_dev_put(drm);
 
@@ -313,6 +314,7 @@ static void exynos_drm_unbind(struct device *dev)
 	drm_dev_unregister(drm);
 
 	drm_kms_helper_poll_fini(drm);
+	drm_atomic_helper_shutdown(drm);
 
 	component_unbind_all(drm->dev, drm);
 	drm_mode_config_cleanup(drm);
@@ -344,15 +346,23 @@ static int exynos_drm_platform_probe(struct platform_device *pdev)
 					       match);
 }
 
-static int exynos_drm_platform_remove(struct platform_device *pdev)
+static void exynos_drm_platform_remove(struct platform_device *pdev)
 {
 	component_master_del(&pdev->dev, &exynos_drm_ops);
-	return 0;
+}
+
+static void exynos_drm_platform_shutdown(struct platform_device *pdev)
+{
+	struct drm_device *drm = platform_get_drvdata(pdev);
+
+	if (drm)
+		drm_atomic_helper_shutdown(drm);
 }
 
 static struct platform_driver exynos_drm_platform_driver = {
 	.probe	= exynos_drm_platform_probe,
-	.remove	= exynos_drm_platform_remove,
+	.remove_new	= exynos_drm_platform_remove,
+	.shutdown = exynos_drm_platform_shutdown,
 	.driver	= {
 		.name	= "exynos-drm",
 		.pm	= &exynos_drm_pm_ops,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 8de2714599fc..e81a576de398 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -1367,7 +1367,7 @@ err_pm_dis:
 	return ret;
 }
 
-static int fimc_remove(struct platform_device *pdev)
+static void fimc_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct fimc_context *ctx = get_fimc_context(dev);
@@ -1377,8 +1377,6 @@ static int fimc_remove(struct platform_device *pdev)
 	pm_runtime_disable(dev);
 
 	fimc_put_clocks(ctx);
-
-	return 0;
 }
 
 static int fimc_runtime_suspend(struct device *dev)
@@ -1410,7 +1408,7 @@ MODULE_DEVICE_TABLE(of, fimc_of_match);
 
 struct platform_driver fimc_driver = {
 	.probe		= fimc_probe,
-	.remove		= fimc_remove,
+	.remove_new	= fimc_remove,
 	.driver		= {
 		.of_match_table = fimc_of_match,
 		.name	= "exynos-drm-fimc",
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 8dde7b1e9b35..a9f1c5c05894 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -1277,13 +1277,11 @@ err_disable_pm_runtime:
 	return ret;
 }
 
-static int fimd_remove(struct platform_device *pdev)
+static void fimd_remove(struct platform_device *pdev)
 {
 	pm_runtime_disable(&pdev->dev);
 
 	component_del(&pdev->dev, &fimd_component_ops);
-
-	return 0;
 }
 
 static int exynos_fimd_suspend(struct device *dev)
@@ -1325,7 +1323,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(exynos_fimd_pm_ops, exynos_fimd_suspend,
 
 struct platform_driver fimd_driver = {
 	.probe		= fimd_probe,
-	.remove		= fimd_remove,
+	.remove_new	= fimd_remove,
 	.driver		= {
 		.name	= "exynos4-fb",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 414e585ec7dd..f3138423612e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -1530,7 +1530,7 @@ err_destroy_slab:
 	return ret;
 }
 
-static int g2d_remove(struct platform_device *pdev)
+static void g2d_remove(struct platform_device *pdev)
 {
 	struct g2d_data *g2d = platform_get_drvdata(pdev);
 
@@ -1545,8 +1545,6 @@ static int g2d_remove(struct platform_device *pdev)
 	g2d_fini_cmdlist(g2d);
 	destroy_workqueue(g2d->g2d_workq);
 	kmem_cache_destroy(g2d->runqueue_slab);
-
-	return 0;
 }
 
 static int g2d_suspend(struct device *dev)
@@ -1609,7 +1607,7 @@ MODULE_DEVICE_TABLE(of, exynos_g2d_match);
 
 struct platform_driver g2d_driver = {
 	.probe		= g2d_probe,
-	.remove		= g2d_remove,
+	.remove_new	= g2d_remove,
 	.driver		= {
 		.name	= "exynos-drm-g2d",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 35771fb4e85d..e9a769590415 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1309,15 +1309,13 @@ err_pm_dis:
 	return ret;
 }
 
-static int gsc_remove(struct platform_device *pdev)
+static void gsc_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 
 	component_del(dev, &gsc_component_ops);
 	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
-
-	return 0;
 }
 
 static int __maybe_unused gsc_runtime_suspend(struct device *dev)
@@ -1422,7 +1420,7 @@ MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match);
 
 struct platform_driver gsc_driver = {
 	.probe		= gsc_probe,
-	.remove		= gsc_remove,
+	.remove_new	= gsc_remove,
 	.driver		= {
 		.name	= "exynos-drm-gsc",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c
index 17bab5b1663f..e2920960180f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_mic.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c
@@ -442,7 +442,7 @@ err:
 	return ret;
 }
 
-static int exynos_mic_remove(struct platform_device *pdev)
+static void exynos_mic_remove(struct platform_device *pdev)
 {
 	struct exynos_mic *mic = platform_get_drvdata(pdev);
 
@@ -450,8 +450,6 @@ static int exynos_mic_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 
 	drm_bridge_remove(&mic->bridge);
-
-	return 0;
 }
 
 static const struct of_device_id exynos_mic_of_match[] = {
@@ -462,7 +460,7 @@ MODULE_DEVICE_TABLE(of, exynos_mic_of_match);
 
 struct platform_driver mic_driver = {
 	.probe		= exynos_mic_probe,
-	.remove		= exynos_mic_remove,
+	.remove_new	= exynos_mic_remove,
 	.driver		= {
 		.name	= "exynos-mic",
 		.pm	= pm_ptr(&exynos_mic_pm_ops),
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index ffb327c5139e..5f7516655b08 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -329,15 +329,13 @@ err_component:
 	return ret;
 }
 
-static int rotator_remove(struct platform_device *pdev)
+static void rotator_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 
 	component_del(dev, &rotator_component_ops);
 	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
-
-	return 0;
 }
 
 static int rotator_runtime_suspend(struct device *dev)
@@ -453,7 +451,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(rotator_pm_ops, rotator_runtime_suspend,
 
 struct platform_driver rotator_driver = {
 	.probe		= rotator_probe,
-	.remove		= rotator_remove,
+	.remove_new	= rotator_remove,
 	.driver		= {
 		.name	= "exynos-rotator",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
index f2b8b09a6b4e..392f721f13ab 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
@@ -539,15 +539,13 @@ err_ippdrv_register:
 	return ret;
 }
 
-static int scaler_remove(struct platform_device *pdev)
+static void scaler_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 
 	component_del(dev, &scaler_component_ops);
 	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
-
-	return 0;
 }
 
 static int clk_disable_unprepare_wrapper(struct clk *clk)
@@ -721,7 +719,7 @@ MODULE_DEVICE_TABLE(of, exynos_scaler_match);
 
 struct platform_driver scaler_driver = {
 	.probe		= scaler_probe,
-	.remove		= scaler_remove,
+	.remove_new	= scaler_remove,
 	.driver		= {
 		.name	= "exynos-scaler",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index f5e1adfcaa51..00382f28748a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -462,7 +462,7 @@ static int vidi_probe(struct platform_device *pdev)
 	return component_add(dev, &vidi_component_ops);
 }
 
-static int vidi_remove(struct platform_device *pdev)
+static void vidi_remove(struct platform_device *pdev)
 {
 	struct vidi_context *ctx = platform_get_drvdata(pdev);
 
@@ -472,13 +472,11 @@ static int vidi_remove(struct platform_device *pdev)
 	}
 
 	component_del(&pdev->dev, &vidi_component_ops);
-
-	return 0;
 }
 
 struct platform_driver vidi_driver = {
 	.probe		= vidi_probe,
-	.remove		= vidi_remove,
+	.remove_new	= vidi_remove,
 	.driver		= {
 		.name	= "exynos-drm-vidi",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index f3aaa4ea3e68..43bed6cbaaea 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -1861,6 +1861,8 @@ static int hdmi_bind(struct device *dev, struct device *master, void *data)
 		return ret;
 
 	crtc = exynos_drm_crtc_get_by_type(drm_dev, EXYNOS_DISPLAY_TYPE_HDMI);
+	if (IS_ERR(crtc))
+		return PTR_ERR(crtc);
 	crtc->pipe_clk = &hdata->phy_clk;
 
 	ret = hdmi_create_connector(encoder);
@@ -2067,7 +2069,7 @@ err_ddc:
 	return ret;
 }
 
-static int hdmi_remove(struct platform_device *pdev)
+static void hdmi_remove(struct platform_device *pdev)
 {
 	struct hdmi_context *hdata = platform_get_drvdata(pdev);
 
@@ -2090,8 +2092,6 @@ static int hdmi_remove(struct platform_device *pdev)
 	put_device(&hdata->ddc_adpt->dev);
 
 	mutex_destroy(&hdata->mutex);
-
-	return 0;
 }
 
 static int __maybe_unused exynos_hdmi_suspend(struct device *dev)
@@ -2123,7 +2123,7 @@ static const struct dev_pm_ops exynos_hdmi_pm_ops = {
 
 struct platform_driver hdmi_driver = {
 	.probe		= hdmi_probe,
-	.remove		= hdmi_remove,
+	.remove_new	= hdmi_remove,
 	.driver		= {
 		.name	= "exynos-hdmi",
 		.owner	= THIS_MODULE,
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index b302392ff0d7..6822333fd0e6 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -1258,13 +1258,11 @@ static int mixer_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int mixer_remove(struct platform_device *pdev)
+static void mixer_remove(struct platform_device *pdev)
 {
 	pm_runtime_disable(&pdev->dev);
 
 	component_del(&pdev->dev, &mixer_component_ops);
-
-	return 0;
 }
 
 static int __maybe_unused exynos_mixer_suspend(struct device *dev)
@@ -1338,5 +1336,5 @@ struct platform_driver mixer_driver = {
 		.of_match_table = mixer_match_types,
 	},
 	.probe = mixer_probe,
-	.remove = mixer_remove,
+	.remove_new = mixer_remove,
 };
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 2d21930d5501..5b7162076850 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -24,7 +24,9 @@ config DRM_I915_DEBUG
 	select DEBUG_FS
 	select PREEMPT_COUNT
 	select I2C_CHARDEV
+	select REF_TRACKER
 	select STACKDEPOT
+	select STACKTRACE
 	select DRM_DP_AUX_CHARDEV
 	select X86_MSR # used by igt/pm_rpm
 	select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
@@ -38,6 +40,7 @@ config DRM_I915_DEBUG
 	select DRM_I915_DEBUG_GEM_ONCE
 	select DRM_I915_DEBUG_MMIO
 	select DRM_I915_DEBUG_RUNTIME_PM
+	select DRM_I915_DEBUG_WAKEREF
 	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
 	select DRM_I915_SELFTEST
 	default n
@@ -231,7 +234,9 @@ config DRM_I915_DEBUG_RUNTIME_PM
 	bool "Enable extra state checking for runtime PM"
 	depends on DRM_I915
 	default n
+	select REF_TRACKER
 	select STACKDEPOT
+	select STACKTRACE
 	help
 	  Choose this option to turn on extra state checking for the
 	  runtime PM functionality. This may introduce overhead during
@@ -240,3 +245,16 @@ config DRM_I915_DEBUG_RUNTIME_PM
 	  Recommended for driver developers only.
 
 	  If in doubt, say "N"
+
+config DRM_I915_DEBUG_WAKEREF
+	bool "Enable extra tracking for wakerefs"
+	depends on DRM_I915
+	select REF_TRACKER
+	select STACKDEPOT
+	select STACKTRACE
+	help
+	  Choose this option to turn on extra state checking and usage
+	  tracking for the wakerefPM functionality. This may introduce
+	  overhead during driver runtime.
+
+	  If in doubt, say "N"
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 65e984242089..e777686190ca 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -280,6 +280,7 @@ i915-y += \
 	display/intel_dsb.o \
 	display/intel_dsb_buffer.o \
 	display/intel_fb.o \
+	display/intel_fb_bo.o \
 	display/intel_fb_pin.o \
 	display/intel_fbc.o \
 	display/intel_fdi.o \
@@ -318,7 +319,8 @@ i915-$(CONFIG_ACPI) += \
 	display/intel_acpi.o \
 	display/intel_opregion.o
 i915-$(CONFIG_DRM_FBDEV_EMULATION) += \
-	display/intel_fbdev.o
+	display/intel_fbdev.o \
+	display/intel_fbdev_fb.o
 i915-$(CONFIG_DEBUG_FS) += \
 	display/intel_display_debugfs.o \
 	display/intel_display_debugfs_params.o \
diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c
index b37c0d02d500..11ca9572e8b3 100644
--- a/drivers/gpu/drm/i915/display/i9xx_wm.c
+++ b/drivers/gpu/drm/i915/display/i9xx_wm.c
@@ -608,7 +608,7 @@ static bool intel_crtc_active(struct intel_crtc *crtc)
 	 * crtc->state->active once we have proper CRTC states wired up
 	 * for atomic.
 	 */
-	return crtc && crtc->active && crtc->base.primary->state->fb &&
+	return crtc->active && crtc->base.primary->state->fb &&
 		crtc->config->hw.adjusted_mode.crtc_clock;
 }
 
@@ -2477,7 +2477,7 @@ static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv,
 		 * FIFO size is only half of the self
 		 * refresh FIFO size on ILK/SNB.
 		 */
-		if (DISPLAY_VER(dev_priv) <= 6)
+		if (DISPLAY_VER(dev_priv) < 7)
 			fifo_size /= 2;
 	}
 
@@ -2818,7 +2818,7 @@ static int ilk_compute_pipe_wm(struct intel_atomic_state *state,
 	usable_level = dev_priv->display.wm.num_levels - 1;
 
 	/* ILK/SNB: LP2+ watermarks only w/o sprites */
-	if (DISPLAY_VER(dev_priv) <= 6 && pipe_wm->sprites_enabled)
+	if (DISPLAY_VER(dev_priv) < 7 && pipe_wm->sprites_enabled)
 		usable_level = 1;
 
 	/* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
@@ -2961,7 +2961,7 @@ static void ilk_wm_merge(struct drm_i915_private *dev_priv,
 	int last_enabled_level = num_levels - 1;
 
 	/* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
-	if ((DISPLAY_VER(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
+	if ((DISPLAY_VER(dev_priv) < 7 || IS_IVYBRIDGE(dev_priv)) &&
 	    config->num_pipes_active > 1)
 		last_enabled_level = 0;
 
@@ -3060,7 +3060,7 @@ static void ilk_compute_wm_results(struct drm_i915_private *dev_priv,
 		 * Always set WM_LP_SPRITE_EN when spr_val != 0, even if the
 		 * level is disabled. Doing otherwise could cause underruns.
 		 */
-		if (DISPLAY_VER(dev_priv) <= 6 && r->spr_val) {
+		if (DISPLAY_VER(dev_priv) < 7 && r->spr_val) {
 			drm_WARN_ON(&dev_priv->drm, wm_lp != 1);
 			results->wm_lp_spr[wm_lp - 1] |= WM_LP_SPRITE_ENABLE;
 		}
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index 481fcb650850..ac456a2275db 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1440,6 +1440,13 @@ static void gen11_dsi_post_disable(struct intel_atomic_state *state,
 static enum drm_mode_status gen11_dsi_mode_valid(struct drm_connector *connector,
 						 struct drm_display_mode *mode)
 {
+	struct drm_i915_private *i915 = to_i915(connector->dev);
+	enum drm_mode_status status;
+
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
+
 	/* FIXME: DSC? */
 	return intel_dsi_mode_valid(connector, mode);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 2fd72b2fd109..aa169b0055e9 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -2201,6 +2201,9 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin)
 	const u8 *ddc_pin_map;
 	int i, n_entries;
 
+	if (IS_DGFX(i915))
+		return vbt_pin;
+
 	if (INTEL_PCH_TYPE(i915) >= PCH_LNL || HAS_PCH_MTP(i915) ||
 	    IS_ALDERLAKE_P(i915)) {
 		ddc_pin_map = adlp_ddc_pin_map;
@@ -2208,8 +2211,6 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin)
 	} else if (IS_ALDERLAKE_S(i915)) {
 		ddc_pin_map = adls_ddc_pin_map;
 		n_entries = ARRAY_SIZE(adls_ddc_pin_map);
-	} else if (INTEL_PCH_TYPE(i915) >= PCH_DG1) {
-		return vbt_pin;
 	} else if (IS_ROCKETLAKE(i915) && INTEL_PCH_TYPE(i915) == PCH_TGP) {
 		ddc_pin_map = rkl_pch_tgp_ddc_pin_map;
 		n_entries = ARRAY_SIZE(rkl_pch_tgp_ddc_pin_map);
@@ -3474,8 +3475,7 @@ bool intel_bios_get_dsc_params(struct intel_encoder *encoder,
 			if (!devdata->dsc)
 				return false;
 
-			if (crtc_state)
-				fill_dsc(crtc_state, devdata->dsc, dsc_max_bpc);
+			fill_dsc(crtc_state, devdata->dsc, dsc_max_bpc);
 
 			return true;
 		}
diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
index bef96db62c80..7f2a50b4f494 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -87,7 +87,8 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
 		return ret;
 
 	dclk = val & 0xffff;
-	sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > 11 ? 500 : 0), 1000);
+	sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) >= 12 ? 500 : 0),
+				1000);
 	sp->t_rp = (val & 0xff0000) >> 16;
 	sp->t_rcd = (val & 0xff000000) >> 24;
 
@@ -480,7 +481,7 @@ static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel
 	if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12)
 		qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1);
 
-	if (DISPLAY_VER(dev_priv) > 11 && num_channels > qi.max_numchannels)
+	if (DISPLAY_VER(dev_priv) >= 12 && num_channels > qi.max_numchannels)
 		drm_warn(&dev_priv->drm, "Number of channels exceeds max number of channels.");
 	if (qi.max_numchannels != 0)
 		num_channels = min_t(u8, num_channels, qi.max_numchannels);
@@ -897,7 +898,7 @@ static int icl_find_qgv_points(struct drm_i915_private *i915,
 		unsigned int idx;
 		unsigned int max_data_rate;
 
-		if (DISPLAY_VER(i915) > 11)
+		if (DISPLAY_VER(i915) >= 12)
 			idx = tgl_max_bw_index(i915, num_active_planes, i);
 		else
 			idx = icl_max_bw_index(i915, num_active_planes, i);
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index b93d1ad7936d..c985ebb6831a 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1180,7 +1180,7 @@ sanitize:
 	/* force cdclk programming */
 	dev_priv->display.cdclk.hw.cdclk = 0;
 	/* force full PLL disable + enable */
-	dev_priv->display.cdclk.hw.vco = -1;
+	dev_priv->display.cdclk.hw.vco = ~0;
 }
 
 static void skl_cdclk_init_hw(struct drm_i915_private *dev_priv)
@@ -1446,50 +1446,77 @@ static u8 bxt_calc_voltage_level(int cdclk)
 	return DIV_ROUND_UP(cdclk, 25000);
 }
 
+static u8 calc_voltage_level(int cdclk, int num_voltage_levels,
+			     const int voltage_level_max_cdclk[])
+{
+	int voltage_level;
+
+	for (voltage_level = 0; voltage_level < num_voltage_levels; voltage_level++) {
+		if (cdclk <= voltage_level_max_cdclk[voltage_level])
+			return voltage_level;
+	}
+
+	MISSING_CASE(cdclk);
+	return num_voltage_levels - 1;
+}
+
 static u8 icl_calc_voltage_level(int cdclk)
 {
-	if (cdclk > 556800)
-		return 2;
-	else if (cdclk > 312000)
-		return 1;
-	else
-		return 0;
+	static const int icl_voltage_level_max_cdclk[] = {
+		[0] = 312000,
+		[1] = 556800,
+		[2] = 652800,
+	};
+
+	return calc_voltage_level(cdclk,
+				  ARRAY_SIZE(icl_voltage_level_max_cdclk),
+				  icl_voltage_level_max_cdclk);
 }
 
 static u8 ehl_calc_voltage_level(int cdclk)
 {
-	if (cdclk > 326400)
-		return 3;
-	else if (cdclk > 312000)
-		return 2;
-	else if (cdclk > 180000)
-		return 1;
-	else
-		return 0;
+	static const int ehl_voltage_level_max_cdclk[] = {
+		[0] = 180000,
+		[1] = 312000,
+		[2] = 326400,
+		/*
+		 * Bspec lists the limit as 556.8 MHz, but some JSL
+		 * development boards (at least) boot with 652.8 MHz
+		 */
+		[3] = 652800,
+	};
+
+	return calc_voltage_level(cdclk,
+				  ARRAY_SIZE(ehl_voltage_level_max_cdclk),
+				  ehl_voltage_level_max_cdclk);
 }
 
 static u8 tgl_calc_voltage_level(int cdclk)
 {
-	if (cdclk > 556800)
-		return 3;
-	else if (cdclk > 326400)
-		return 2;
-	else if (cdclk > 312000)
-		return 1;
-	else
-		return 0;
+	static const int tgl_voltage_level_max_cdclk[] = {
+		[0] = 312000,
+		[1] = 326400,
+		[2] = 556800,
+		[3] = 652800,
+	};
+
+	return calc_voltage_level(cdclk,
+				  ARRAY_SIZE(tgl_voltage_level_max_cdclk),
+				  tgl_voltage_level_max_cdclk);
 }
 
 static u8 rplu_calc_voltage_level(int cdclk)
 {
-	if (cdclk > 556800)
-		return 3;
-	else if (cdclk > 480000)
-		return 2;
-	else if (cdclk > 312000)
-		return 1;
-	else
-		return 0;
+	static const int rplu_voltage_level_max_cdclk[] = {
+		[0] = 312000,
+		[1] = 480000,
+		[2] = 556800,
+		[3] = 652800,
+	};
+
+	return calc_voltage_level(cdclk,
+				  ARRAY_SIZE(rplu_voltage_level_max_cdclk),
+				  rplu_voltage_level_max_cdclk);
 }
 
 static void icl_readout_refclk(struct drm_i915_private *dev_priv,
@@ -1800,6 +1827,8 @@ static bool cdclk_pll_is_unknown(unsigned int vco)
 	return vco == ~0;
 }
 
+static const int cdclk_squash_len = 16;
+
 static int cdclk_squash_divider(u16 waveform)
 {
 	return hweight16(waveform ?: 0xffff);
@@ -1811,7 +1840,6 @@ static bool cdclk_compute_crawl_and_squash_midpoint(struct drm_i915_private *i91
 						    struct intel_cdclk_config *mid_cdclk_config)
 {
 	u16 old_waveform, new_waveform, mid_waveform;
-	int size = 16;
 	int div = 2;
 
 	/* Return if PLL is in an unknown state, force a complete disable and re-enable. */
@@ -1850,7 +1878,8 @@ static bool cdclk_compute_crawl_and_squash_midpoint(struct drm_i915_private *i91
 	}
 
 	mid_cdclk_config->cdclk = DIV_ROUND_CLOSEST(cdclk_squash_divider(mid_waveform) *
-						    mid_cdclk_config->vco, size * div);
+						    mid_cdclk_config->vco,
+						    cdclk_squash_len * div);
 
 	/* make sure the mid clock came out sane */
 
@@ -1878,9 +1907,9 @@ static void _bxt_set_cdclk(struct drm_i915_private *dev_priv,
 {
 	int cdclk = cdclk_config->cdclk;
 	int vco = cdclk_config->vco;
-	u32 val;
+	int unsquashed_cdclk;
 	u16 waveform;
-	int clock;
+	u32 val;
 
 	if (HAS_CDCLK_CRAWL(dev_priv) && dev_priv->display.cdclk.hw.vco > 0 && vco > 0 &&
 	    !cdclk_pll_is_unknown(dev_priv->display.cdclk.hw.vco)) {
@@ -1897,15 +1926,13 @@ static void _bxt_set_cdclk(struct drm_i915_private *dev_priv,
 
 	waveform = cdclk_squash_waveform(dev_priv, cdclk);
 
-	if (waveform)
-		clock = vco / 2;
-	else
-		clock = cdclk;
+	unsquashed_cdclk = DIV_ROUND_CLOSEST(cdclk * cdclk_squash_len,
+					     cdclk_squash_divider(waveform));
 
 	if (HAS_CDCLK_SQUASH(dev_priv))
 		dg2_cdclk_squash_program(dev_priv, waveform);
 
-	val = bxt_cdclk_cd2x_div_sel(dev_priv, clock, vco) |
+	val = bxt_cdclk_cd2x_div_sel(dev_priv, unsquashed_cdclk, vco) |
 		bxt_cdclk_cd2x_pipe(dev_priv, pipe);
 
 	/*
@@ -2075,7 +2102,7 @@ sanitize:
 	dev_priv->display.cdclk.hw.cdclk = 0;
 
 	/* force full PLL disable + enable */
-	dev_priv->display.cdclk.hw.vco = -1;
+	dev_priv->display.cdclk.hw.vco = ~0;
 }
 
 static void bxt_cdclk_init_hw(struct drm_i915_private *dev_priv)
@@ -2597,7 +2624,7 @@ static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state)
 		 * Since PPC = 2 with bigjoiner
 		 * => CDCLK >= compressed_bpp * Pixel clock  / 2 * Bigjoiner Interface bits
 		 */
-		int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24;
+		int bigjoiner_interface_bits = DISPLAY_VER(i915) >= 14 ? 36 : 24;
 		int min_cdclk_bj =
 			(to_bpp_int_roundup(crtc_state->dsc.compressed_bpp_x16) *
 			 pixel_clock) / (2 * bigjoiner_interface_bits);
@@ -3489,7 +3516,7 @@ static const struct intel_cdclk_funcs mtl_cdclk_funcs = {
 	.get_cdclk = bxt_get_cdclk,
 	.set_cdclk = bxt_set_cdclk,
 	.modeset_calc_cdclk = bxt_modeset_calc_cdclk,
-	.calc_voltage_level = tgl_calc_voltage_level,
+	.calc_voltage_level = rplu_calc_voltage_level,
 };
 
 static const struct intel_cdclk_funcs rplu_cdclk_funcs = {
diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c
index 0e33a0523a75..abaacea5c2cc 100644
--- a/drivers/gpu/drm/i915/display/intel_crt.c
+++ b/drivers/gpu/drm/i915/display/intel_crt.c
@@ -348,8 +348,13 @@ intel_crt_mode_valid(struct drm_connector *connector,
 	struct drm_device *dev = connector->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	int max_dotclk = dev_priv->max_dotclk_freq;
+	enum drm_mode_status status;
 	int max_clock;
 
+	status = intel_cpu_transcoder_mode_valid(dev_priv, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
 
diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c
index 1fd068e6e26c..8a84a31c7b48 100644
--- a/drivers/gpu/drm/i915/display/intel_crtc.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc.c
@@ -553,8 +553,15 @@ void intel_pipe_update_start(struct intel_atomic_state *state,
 
 	intel_psr_lock(new_crtc_state);
 
-	if (new_crtc_state->do_async_flip)
+	if (new_crtc_state->do_async_flip) {
+		spin_lock_irq(&crtc->base.dev->event_lock);
+		/* arm the event for the flip done irq handler */
+		crtc->flip_done_event = new_crtc_state->uapi.event;
+		spin_unlock_irq(&crtc->base.dev->event_lock);
+
+		new_crtc_state->uapi.event = NULL;
 		return;
+	}
 
 	if (intel_crtc_needs_vblank_work(new_crtc_state))
 		intel_crtc_vblank_work_init(new_crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
index 2d15e82c0b3d..49fd100ec98a 100644
--- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c
@@ -262,6 +262,15 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config,
 		drm_dbg_kms(&i915->drm, "fec: %s, enhanced framing: %s\n",
 			    str_enabled_disabled(pipe_config->fec_enable),
 			    str_enabled_disabled(pipe_config->enhanced_framing));
+
+		drm_dbg_kms(&i915->drm, "sdp split: %s\n",
+			    str_enabled_disabled(pipe_config->sdp_split_enable));
+
+		drm_dbg_kms(&i915->drm, "psr: %s, psr2: %s, panel replay: %s, selective fetch: %s\n",
+			    str_enabled_disabled(pipe_config->has_psr),
+			    str_enabled_disabled(pipe_config->has_psr2),
+			    str_enabled_disabled(pipe_config->has_panel_replay),
+			    str_enabled_disabled(pipe_config->enable_psr2_sel_fetch));
 	}
 
 	drm_dbg_kms(&i915->drm, "framestart delay: %d, MSA timing delay: %d\n",
diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
index b342fad180ca..926e2de00eb5 100644
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -21,8 +21,11 @@
 #include "intel_fb_pin.h"
 #include "intel_frontbuffer.h"
 #include "intel_psr.h"
+#include "intel_psr_regs.h"
 #include "skl_watermark.h"
 
+#include "gem/i915_gem_object.h"
+
 /* Cursor formats */
 static const u32 intel_cursor_formats[] = {
 	DRM_FORMAT_ARGB8888,
@@ -33,11 +36,11 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
 	struct drm_i915_private *dev_priv =
 		to_i915(plane_state->uapi.plane->dev);
 	const struct drm_framebuffer *fb = plane_state->hw.fb;
-	const struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	u32 base;
 
 	if (DISPLAY_INFO(dev_priv)->cursor_needs_physical)
-		base = sg_dma_address(obj->mm.pages->sgl);
+		base = i915_gem_object_get_dma_address(obj, 0);
 	else
 		base = intel_plane_ggtt_offset(plane_state);
 
@@ -484,6 +487,35 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state,
 	return 0;
 }
 
+static void i9xx_cursor_disable_sel_fetch_arm(struct intel_plane *plane,
+					      const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0);
+}
+
+static void i9xx_cursor_update_sel_fetch_arm(struct intel_plane *plane,
+					     const struct intel_crtc_state *crtc_state,
+					     const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	if (drm_rect_height(&plane_state->psr2_sel_fetch_area) > 0)
+		intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
+				  plane_state->ctl);
+	else
+		i9xx_cursor_disable_sel_fetch_arm(plane, crtc_state);
+}
+
 /* TODO: split into noarm+arm pair */
 static void i9xx_cursor_update_arm(struct intel_plane *plane,
 				   const struct intel_crtc_state *crtc_state,
@@ -531,10 +563,10 @@ static void i9xx_cursor_update_arm(struct intel_plane *plane,
 		skl_write_cursor_wm(plane, crtc_state);
 
 	if (plane_state)
-		intel_psr2_program_plane_sel_fetch_arm(plane, crtc_state,
-						       plane_state);
+		i9xx_cursor_update_sel_fetch_arm(plane, crtc_state,
+						 plane_state);
 	else
-		intel_psr2_disable_plane_sel_fetch_arm(plane, crtc_state);
+		i9xx_cursor_disable_sel_fetch_arm(plane, crtc_state);
 
 	if (plane->cursor.base != base ||
 	    plane->cursor.size != fbc_ctl ||
diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index a8fa76580802..6b25e195232f 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -415,9 +415,15 @@ void intel_cx0_phy_set_signal_levels(struct intel_encoder *encoder,
 	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	const struct intel_ddi_buf_trans *trans;
 	enum phy phy = intel_port_to_phy(i915, encoder->port);
-	u8 owned_lane_mask = intel_cx0_get_owned_lane_mask(i915, encoder);
+	u8 owned_lane_mask;
 	intel_wakeref_t wakeref;
 	int n_entries, ln;
+	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
+
+	if (intel_tc_port_in_tbt_alt_mode(dig_port))
+		return;
+
+	owned_lane_mask = intel_cx0_get_owned_lane_mask(i915, encoder);
 
 	wakeref = intel_cx0_phy_transaction_begin(encoder);
 
@@ -739,7 +745,6 @@ static const struct intel_c10pll_state * const mtl_c10_edp_tables[] = {
 
 /* C20 basic DP 1.4 tables */
 static const struct intel_c20pll_state mtl_c20_dp_rbr = {
-	.link_bit_rate = 162000,
 	.clock = 162000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x5800, /* tx cfg1 */
@@ -765,7 +770,6 @@ static const struct intel_c20pll_state mtl_c20_dp_rbr = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_hbr1 = {
-	.link_bit_rate = 270000,
 	.clock = 270000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
@@ -791,7 +795,6 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr1 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_hbr2 = {
-	.link_bit_rate = 540000,
 	.clock = 540000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
@@ -817,7 +820,6 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr2 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_hbr3 = {
-	.link_bit_rate = 810000,
 	.clock = 810000,
 	.tx = {	0xbe88, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
@@ -844,8 +846,7 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr3 = {
 
 /* C20 basic DP 2.0 tables */
 static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = {
-	.link_bit_rate = 1000000, /* 10 Gbps */
-	.clock = 312500,
+	.clock = 1000000, /* 10 Gbps */
 	.tx = {	0xbe21, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -869,8 +870,7 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = {
-	.link_bit_rate = 1350000, /* 13.5 Gbps */
-	.clock = 421875,
+	.clock = 1350000, /* 13.5 Gbps */
 	.tx = {	0xbea0, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -895,8 +895,7 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_dp_uhbr20 = {
-	.link_bit_rate = 2000000, /* 20 Gbps */
-	.clock = 625000,
+	.clock = 2000000, /* 20 Gbps */
 	.tx = {	0xbe20, /* tx cfg0 */
 		0x4800, /* tx cfg1 */
 		0x0000, /* tx cfg2 */
@@ -1515,7 +1514,6 @@ static const struct intel_c10pll_state * const mtl_c10_hdmi_tables[] = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = {
-	.link_bit_rate = 25175,
 	.clock = 25175,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
@@ -1541,7 +1539,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = {
-	.link_bit_rate = 27000,
 	.clock = 27000,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
@@ -1567,7 +1564,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = {
-	.link_bit_rate = 74250,
 	.clock = 74250,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
@@ -1593,7 +1589,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = {
-	.link_bit_rate = 148500,
 	.clock = 148500,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
@@ -1619,7 +1614,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_594 = {
-	.link_bit_rate = 594000,
 	.clock = 594000,
 	.tx = {  0xbe88, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
@@ -1645,8 +1639,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_594 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_300 = {
-	.link_bit_rate = 3000000,
-	.clock = 166670,
+	.clock = 3000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1671,8 +1664,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_300 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_600 = {
-	.link_bit_rate = 6000000,
-	.clock = 333330,
+	.clock = 6000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1697,8 +1689,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_600 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_800 = {
-	.link_bit_rate = 8000000,
-	.clock = 444440,
+	.clock = 8000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1723,8 +1714,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_800 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_1000 = {
-	.link_bit_rate = 10000000,
-	.clock = 555560,
+	.clock = 10000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1749,8 +1739,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_1000 = {
 };
 
 static const struct intel_c20pll_state mtl_c20_hdmi_1200 = {
-	.link_bit_rate = 12000000,
-	.clock = 666670,
+	.clock = 12000000,
 	.tx = {  0xbe98, /* tx cfg0 */
 		  0x9800, /* tx cfg1 */
 		  0x0000, /* tx cfg2 */
@@ -1999,7 +1988,6 @@ static int intel_c20_compute_hdmi_tmds_pll(u64 pixel_clock, struct intel_c20pll_
 	else
 		mpllb_ana_freq_vco = MPLLB_ANA_FREQ_VCO_0;
 
-	pll_state->link_bit_rate	= pixel_clock;
 	pll_state->clock	= pixel_clock;
 	pll_state->tx[0]	= 0xbe88;
 	pll_state->tx[1]	= 0x9800;
@@ -2036,7 +2024,7 @@ static int intel_c20_phy_check_hdmi_link_rate(int clock)
 	int i;
 
 	for (i = 0; tables[i]; i++) {
-		if (clock == tables[i]->link_bit_rate)
+		if (clock == tables[i]->clock)
 			return MODE_OK;
 	}
 
@@ -2088,7 +2076,7 @@ static int intel_c20pll_calc_state(struct intel_crtc_state *crtc_state,
 		return -EINVAL;
 
 	for (i = 0; tables[i]; i++) {
-		if (crtc_state->port_clock == tables[i]->link_bit_rate) {
+		if (crtc_state->port_clock == tables[i]->clock) {
 			crtc_state->cx0pll_state.c20 = *tables[i];
 			return 0;
 		}
@@ -2111,7 +2099,7 @@ int intel_cx0pll_calc_state(struct intel_crtc_state *crtc_state,
 static bool intel_c20_use_mplla(u32 clock)
 {
 	/* 10G and 20G rates use MPLLA */
-	if (clock == 312500 || clock == 625000)
+	if (clock == 1000000 || clock == 2000000)
 		return true;
 
 	return false;
@@ -2214,11 +2202,11 @@ static u8 intel_c20_get_dp_rate(u32 clock)
 		return 6;
 	case 432000: /* 4.32 Gbps eDP */
 		return 7;
-	case 312500: /* 10 Gbps DP2.0 */
+	case 1000000: /* 10 Gbps DP2.0 */
 		return 8;
-	case 421875: /* 13.5 Gbps DP2.0 */
+	case 1350000: /* 13.5 Gbps DP2.0 */
 		return 9;
-	case 625000: /* 20 Gbps DP2.0*/
+	case 2000000: /* 20 Gbps DP2.0 */
 		return 10;
 	case 648000: /* 6.48 Gbps eDP*/
 		return 11;
@@ -2236,13 +2224,13 @@ static u8 intel_c20_get_hdmi_rate(u32 clock)
 		return 0;
 
 	switch (clock) {
-	case 166670: /* 3 Gbps */
-	case 333330: /* 6 Gbps */
-	case 666670: /* 12 Gbps */
+	case 300000: /* 3 Gbps */
+	case 600000: /* 6 Gbps */
+	case 1200000: /* 12 Gbps */
 		return 1;
-	case 444440: /* 8 Gbps */
+	case 800000: /* 8 Gbps */
 		return 2;
-	case 555560: /* 10 Gbps */
+	case 1000000: /* 10 Gbps */
 		return 3;
 	default:
 		MISSING_CASE(clock);
@@ -2253,7 +2241,7 @@ static u8 intel_c20_get_hdmi_rate(u32 clock)
 static bool is_dp2(u32 clock)
 {
 	/* DP2.0 clock rates */
-	if (clock == 312500 || clock == 421875 || clock  == 625000)
+	if (clock == 1000000 || clock == 1350000 || clock  == 2000000)
 		return true;
 
 	return false;
@@ -2262,11 +2250,11 @@ static bool is_dp2(u32 clock)
 static bool is_hdmi_frl(u32 clock)
 {
 	switch (clock) {
-	case 166670: /* 3 Gbps */
-	case 333330: /* 6 Gbps */
-	case 444440: /* 8 Gbps */
-	case 555560: /* 10 Gbps */
-	case 666670: /* 12 Gbps */
+	case 300000: /* 3 Gbps */
+	case 600000: /* 6 Gbps */
+	case 800000: /* 8 Gbps */
+	case 1000000: /* 10 Gbps */
+	case 1200000: /* 12 Gbps */
 		return true;
 	default:
 		return false;
@@ -2299,6 +2287,7 @@ static void intel_c20_pll_program(struct drm_i915_private *i915,
 	const struct intel_c20pll_state *pll_state = &crtc_state->cx0pll_state.c20;
 	bool dp = false;
 	int lane = crtc_state->lane_count > 2 ? INTEL_CX0_BOTH_LANES : INTEL_CX0_LANE0;
+	u32 clock = crtc_state->port_clock;
 	bool cntx;
 	int i;
 
@@ -2337,7 +2326,7 @@ static void intel_c20_pll_program(struct drm_i915_private *i915,
 	}
 
 	/* 3.3 mpllb or mplla configuration */
-	if (intel_c20_use_mplla(pll_state->clock)) {
+	if (intel_c20_use_mplla(clock)) {
 		for (i = 0; i < ARRAY_SIZE(pll_state->mplla); i++) {
 			if (cntx)
 				intel_c20_sram_write(i915, encoder->port, INTEL_CX0_LANE0,
@@ -2364,23 +2353,23 @@ static void intel_c20_pll_program(struct drm_i915_private *i915,
 	/* 4. Program custom width to match the link protocol */
 	intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_WIDTH,
 		      PHY_C20_CUSTOM_WIDTH_MASK,
-		      PHY_C20_CUSTOM_WIDTH(intel_get_c20_custom_width(pll_state->clock, dp)),
+		      PHY_C20_CUSTOM_WIDTH(intel_get_c20_custom_width(clock, dp)),
 		      MB_WRITE_COMMITTED);
 
 	/* 5. For DP or 6. For HDMI */
 	if (dp) {
 		intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_SERDES_RATE,
 			      BIT(6) | PHY_C20_CUSTOM_SERDES_MASK,
-			      BIT(6) | PHY_C20_CUSTOM_SERDES(intel_c20_get_dp_rate(pll_state->clock)),
+			      BIT(6) | PHY_C20_CUSTOM_SERDES(intel_c20_get_dp_rate(clock)),
 			      MB_WRITE_COMMITTED);
 	} else {
 		intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_SERDES_RATE,
 			      BIT(7) | PHY_C20_CUSTOM_SERDES_MASK,
-			      is_hdmi_frl(pll_state->clock) ? BIT(7) : 0,
+			      is_hdmi_frl(clock) ? BIT(7) : 0,
 			      MB_WRITE_COMMITTED);
 
 		intel_cx0_write(i915, encoder->port, INTEL_CX0_BOTH_LANES, PHY_C20_VDR_HDMI_RATE,
-				intel_c20_get_hdmi_rate(pll_state->clock),
+				intel_c20_get_hdmi_rate(clock),
 				MB_WRITE_COMMITTED);
 	}
 
@@ -2479,7 +2468,8 @@ static void intel_program_port_clock_ctl(struct intel_encoder *encoder,
 
 	val |= XELPDP_FORWARD_CLOCK_UNGATE;
 
-	if (is_hdmi_frl(crtc_state->port_clock))
+	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) &&
+	    is_hdmi_frl(crtc_state->port_clock))
 		val |= XELPDP_DDI_CLOCK_SELECT(XELPDP_DDI_CLOCK_SELECT_DIV18CLK);
 	else
 		val |= XELPDP_DDI_CLOCK_SELECT(XELPDP_DDI_CLOCK_SELECT_MAXPCLK);
@@ -3077,24 +3067,29 @@ static void intel_c20pll_state_verify(const struct intel_crtc_state *state,
 {
 	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	const struct intel_c20pll_state *mpll_sw_state = &state->cx0pll_state.c20;
-	bool use_mplla;
+	bool sw_use_mpllb = mpll_sw_state->tx[0] & C20_PHY_USE_MPLLB;
+	bool hw_use_mpllb = mpll_hw_state->tx[0] & C20_PHY_USE_MPLLB;
 	int i;
 
-	use_mplla = intel_c20_use_mplla(mpll_hw_state->clock);
-	if (use_mplla) {
-		for (i = 0; i < ARRAY_SIZE(mpll_sw_state->mplla); i++) {
-			I915_STATE_WARN(i915, mpll_hw_state->mplla[i] != mpll_sw_state->mplla[i],
-					"[CRTC:%d:%s] mismatch in C20MPLLA: Register[%d] (expected 0x%04x, found 0x%04x)",
-					crtc->base.base.id, crtc->base.name, i,
-					mpll_sw_state->mplla[i], mpll_hw_state->mplla[i]);
-		}
-	} else {
+	I915_STATE_WARN(i915, sw_use_mpllb != hw_use_mpllb,
+			"[CRTC:%d:%s] mismatch in C20: Register MPLLB selection (expected %d, found %d)",
+			crtc->base.base.id, crtc->base.name,
+			sw_use_mpllb, hw_use_mpllb);
+
+	if (hw_use_mpllb) {
 		for (i = 0; i < ARRAY_SIZE(mpll_sw_state->mpllb); i++) {
 			I915_STATE_WARN(i915, mpll_hw_state->mpllb[i] != mpll_sw_state->mpllb[i],
 					"[CRTC:%d:%s] mismatch in C20MPLLB: Register[%d] (expected 0x%04x, found 0x%04x)",
 					crtc->base.base.id, crtc->base.name, i,
 					mpll_sw_state->mpllb[i], mpll_hw_state->mpllb[i]);
 		}
+	} else {
+		for (i = 0; i < ARRAY_SIZE(mpll_sw_state->mplla); i++) {
+			I915_STATE_WARN(i915, mpll_hw_state->mplla[i] != mpll_sw_state->mplla[i],
+					"[CRTC:%d:%s] mismatch in C20MPLLA: Register[%d] (expected 0x%04x, found 0x%04x)",
+					crtc->base.base.id, crtc->base.name, i,
+					mpll_sw_state->mplla[i], mpll_hw_state->mplla[i]);
+		}
 	}
 
 	for (i = 0; i < ARRAY_SIZE(mpll_sw_state->tx); i++) {
@@ -3136,6 +3131,9 @@ void intel_cx0pll_state_verify(struct intel_atomic_state *state,
 	encoder = intel_get_crtc_new_encoder(state, new_crtc_state);
 	phy = intel_port_to_phy(i915, encoder->port);
 
+	if (intel_tc_port_in_tbt_alt_mode(enc_to_dig_port(encoder)))
+		return;
+
 	intel_cx0pll_readout_hw_state(encoder, &mpll_hw_state);
 
 	if (intel_is_c10phy(i915, phy))
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 38f28c480b38..12a29363e5df 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3672,16 +3672,42 @@ static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
 		AUDIO_OUTPUT_ENABLE(cpu_transcoder);
 }
 
-void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv,
-					 struct intel_crtc_state *crtc_state)
+static int tgl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state)
 {
-	if (DISPLAY_VER(dev_priv) >= 12 && crtc_state->port_clock > 594000)
-		crtc_state->min_voltage_level = 2;
-	else if ((IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv)) &&
-		 crtc_state->port_clock > 594000)
-		crtc_state->min_voltage_level = 3;
-	else if (DISPLAY_VER(dev_priv) >= 11 && crtc_state->port_clock > 594000)
-		crtc_state->min_voltage_level = 1;
+	if (crtc_state->port_clock > 594000)
+		return 2;
+	else
+		return 0;
+}
+
+static int jsl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state)
+{
+	if (crtc_state->port_clock > 594000)
+		return 3;
+	else
+		return 0;
+}
+
+static int icl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state)
+{
+	if (crtc_state->port_clock > 594000)
+		return 1;
+	else
+		return 0;
+}
+
+void intel_ddi_compute_min_voltage_level(struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+
+	if (DISPLAY_VER(dev_priv) >= 14)
+		crtc_state->min_voltage_level = icl_ddi_min_voltage_level(crtc_state);
+	else if (DISPLAY_VER(dev_priv) >= 12)
+		crtc_state->min_voltage_level = tgl_ddi_min_voltage_level(crtc_state);
+	else if (IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv))
+		crtc_state->min_voltage_level = jsl_ddi_min_voltage_level(crtc_state);
+	else if (DISPLAY_VER(dev_priv) >= 11)
+		crtc_state->min_voltage_level = icl_ddi_min_voltage_level(crtc_state);
 }
 
 static enum transcoder bdw_transcoder_master_readout(struct drm_i915_private *dev_priv,
@@ -3895,7 +3921,7 @@ static void intel_ddi_get_config(struct intel_encoder *encoder,
 		pipe_config->lane_lat_optim_mask =
 			bxt_ddi_phy_get_lane_lat_optim_mask(encoder);
 
-	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
+	intel_ddi_compute_min_voltage_level(pipe_config);
 
 	intel_hdmi_read_gcp_infoframe(encoder, pipe_config);
 
@@ -4175,7 +4201,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
 		pipe_config->lane_lat_optim_mask =
 			bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count);
 
-	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
+	intel_ddi_compute_min_voltage_level(pipe_config);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h
index 63853a1f6582..434de7196875 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.h
+++ b/drivers/gpu/drm/i915/display/intel_ddi.h
@@ -70,8 +70,7 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state,
 bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
 void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state,
 				    bool state);
-void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv,
-					 struct intel_crtc_state *crtc_state);
+void intel_ddi_compute_min_voltage_level(struct intel_crtc_state *crtc_state);
 int intel_ddi_toggle_hdcp_bits(struct intel_encoder *intel_encoder,
 			       enum transcoder cpu_transcoder,
 			       bool enable, u32 hdcp_mask);
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 5cf162628b95..b10aad15a63d 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2627,7 +2627,7 @@ static void intel_set_transcoder_timings(const struct intel_crtc_state *crtc_sta
 		crtc_vblank_start = 1;
 	}
 
-	if (DISPLAY_VER(dev_priv) > 3)
+	if (DISPLAY_VER(dev_priv) >= 4)
 		intel_de_write(dev_priv, TRANS_VSYNCSHIFT(cpu_transcoder),
 			       vsyncshift);
 
@@ -3167,7 +3167,7 @@ static void bdw_set_pipe_misc(const struct intel_crtc_state *crtc_state)
 		break;
 	case 36:
 		/* Port output 12BPC defined for ADLP+ */
-		if (DISPLAY_VER(dev_priv) > 12)
+		if (DISPLAY_VER(dev_priv) >= 13)
 			val |= PIPE_MISC_BPC_12_ADLP;
 		break;
 	default:
@@ -3224,7 +3224,7 @@ int bdw_get_pipe_misc_bpp(struct intel_crtc *crtc)
 	 * MIPI DSI HW readout.
 	 */
 	case PIPE_MISC_BPC_12_ADLP:
-		if (DISPLAY_VER(dev_priv) > 12)
+		if (DISPLAY_VER(dev_priv) >= 13)
 			return 36;
 		fallthrough;
 	default:
@@ -3746,8 +3746,8 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc,
 	if (!active)
 		goto out;
 
-	intel_dsc_get_config(pipe_config);
 	intel_bigjoiner_get_config(pipe_config);
+	intel_dsc_get_config(pipe_config);
 
 	if (!transcoder_is_dsi(pipe_config->cpu_transcoder) ||
 	    DISPLAY_VER(dev_priv) >= 11)
@@ -4923,6 +4923,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_X(name) do { \
 	if (current_config->name != pipe_config->name) { \
+		BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \
+				 __stringify(name) " is bool");	\
 		pipe_config_mismatch(fastset, crtc, __stringify(name), \
 				     "(expected 0x%08x, found 0x%08x)", \
 				     current_config->name, \
@@ -4933,6 +4935,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_X_WITH_MASK(name, mask) do { \
 	if ((current_config->name & (mask)) != (pipe_config->name & (mask))) { \
+		BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \
+				 __stringify(name) " is bool");	\
 		pipe_config_mismatch(fastset, crtc, __stringify(name), \
 				     "(expected 0x%08x, found 0x%08x)", \
 				     current_config->name & (mask), \
@@ -4943,6 +4947,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_I(name) do { \
 	if (current_config->name != pipe_config->name) { \
+		BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \
+				 __stringify(name) " is bool");	\
 		pipe_config_mismatch(fastset, crtc, __stringify(name), \
 				     "(expected %i, found %i)", \
 				     current_config->name, \
@@ -4953,6 +4959,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 
 #define PIPE_CONF_CHECK_BOOL(name) do { \
 	if (current_config->name != pipe_config->name) { \
+		BUILD_BUG_ON_MSG(!__same_type(current_config->name, bool), \
+				 __stringify(name) " is not bool");	\
 		pipe_config_mismatch(fastset, crtc,  __stringify(name), \
 				     "(expected %s, found %s)", \
 				     str_yes_no(current_config->name), \
@@ -5091,8 +5099,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 #define PIPE_CONF_QUIRK(quirk) \
 	((current_config->quirks | pipe_config->quirks) & (quirk))
 
-	PIPE_CONF_CHECK_I(hw.enable);
-	PIPE_CONF_CHECK_I(hw.active);
+	PIPE_CONF_CHECK_BOOL(hw.enable);
+	PIPE_CONF_CHECK_BOOL(hw.active);
 
 	PIPE_CONF_CHECK_I(cpu_transcoder);
 	PIPE_CONF_CHECK_I(mst_master_transcoder);
@@ -5301,8 +5309,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	PIPE_CONF_CHECK_I(dsc.config.second_line_bpg_offset);
 	PIPE_CONF_CHECK_I(dsc.config.nsl_bpg_offset);
 
-	PIPE_CONF_CHECK_I(dsc.compression_enable);
-	PIPE_CONF_CHECK_I(dsc.dsc_split);
+	PIPE_CONF_CHECK_BOOL(dsc.compression_enable);
+	PIPE_CONF_CHECK_BOOL(dsc.dsc_split);
 	PIPE_CONF_CHECK_I(dsc.compressed_bpp_x16);
 
 	PIPE_CONF_CHECK_BOOL(splitter.enable);
@@ -5918,6 +5926,17 @@ static int intel_async_flip_check_uapi(struct intel_atomic_state *state,
 		return -EINVAL;
 	}
 
+	/*
+	 * FIXME: Bigjoiner+async flip is busted currently.
+	 * Remove this check once the issues are fixed.
+	 */
+	if (new_crtc_state->bigjoiner_pipes) {
+		drm_dbg_kms(&i915->drm,
+			    "[CRTC:%d:%s] async flip disallowed with bigjoiner\n",
+			    crtc->base.base.id, crtc->base.name);
+		return -EINVAL;
+	}
+
 	for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state,
 					     new_plane_state, i) {
 		if (plane->pipe != crtc->pipe)
@@ -6955,24 +6974,6 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 	drm_WARN_ON(&dev_priv->drm, update_pipes);
 }
 
-static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
-{
-	struct intel_atomic_state *state, *next;
-	struct llist_node *freed;
-
-	freed = llist_del_all(&dev_priv->display.atomic_helper.free_list);
-	llist_for_each_entry_safe(state, next, freed, freed)
-		drm_atomic_state_put(&state->base);
-}
-
-void intel_atomic_helper_free_state_worker(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, typeof(*dev_priv), display.atomic_helper.free_work);
-
-	intel_atomic_helper_free_state(dev_priv);
-}
-
 static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_state)
 {
 	struct drm_i915_private *i915 = to_i915(intel_state->base.dev);
@@ -7008,8 +7009,6 @@ static void intel_atomic_cleanup_work(struct work_struct *work)
 	drm_atomic_helper_cleanup_planes(&i915->drm, &state->base);
 	drm_atomic_helper_commit_cleanup_done(&state->base);
 	drm_atomic_state_put(&state->base);
-
-	intel_atomic_helper_free_state(i915);
 }
 
 static void intel_atomic_prepare_plane_clear_colors(struct intel_atomic_state *state)
@@ -7354,7 +7353,7 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state,
 		for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i)
 			intel_color_cleanup_commit(new_crtc_state);
 
-		drm_atomic_helper_cleanup_planes(dev, &state->base);
+		drm_atomic_helper_unprepare_planes(dev, &state->base);
 		intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref);
 		return ret;
 	}
@@ -7734,6 +7733,16 @@ enum drm_mode_status intel_mode_valid(struct drm_device *dev,
 	    mode->vtotal > vtotal_max)
 		return MODE_V_ILLEGAL;
 
+	return MODE_OK;
+}
+
+enum drm_mode_status intel_cpu_transcoder_mode_valid(struct drm_i915_private *dev_priv,
+						     const struct drm_display_mode *mode)
+{
+	/*
+	 * Additional transcoder timing limits,
+	 * excluding BXT/GLK DSI transcoders.
+	 */
 	if (DISPLAY_VER(dev_priv) >= 5) {
 		if (mode->hdisplay < 64 ||
 		    mode->htotal - mode->hdisplay < 32)
@@ -7753,7 +7762,7 @@ enum drm_mode_status intel_mode_valid(struct drm_device *dev,
 	 * Cantiga+ cannot handle modes with a hsync front porch of 0.
 	 * WaPruneModeWithIncorrectHsyncOffset:ctg,elk,ilk,snb,ivb,vlv,hsw.
 	 */
-	if ((DISPLAY_VER(dev_priv) > 4 || IS_G4X(dev_priv)) &&
+	if ((DISPLAY_VER(dev_priv) >= 5 || IS_G4X(dev_priv)) &&
 	    mode->hsync_start == mode->hdisplay)
 		return MODE_H_ILLEGAL;
 
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 8548f49e3972..f4a0773f0fca 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -402,6 +402,9 @@ enum drm_mode_status
 intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv,
 				const struct drm_display_mode *mode,
 				bool bigjoiner);
+enum drm_mode_status
+intel_cpu_transcoder_mode_valid(struct drm_i915_private *i915,
+				const struct drm_display_mode *mode);
 enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port);
 bool is_trans_port_sync_mode(const struct intel_crtc_state *state);
 bool is_trans_port_sync_master(const struct intel_crtc_state *state);
diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h
index 7e82b87e9cde..47297ed85822 100644
--- a/drivers/gpu/drm/i915/display/intel_display_core.h
+++ b/drivers/gpu/drm/i915/display/intel_display_core.h
@@ -298,12 +298,6 @@ struct intel_display {
 		const struct intel_audio_funcs *audio;
 	} funcs;
 
-	/* Grouping using anonymous structs. Keep sorted. */
-	struct intel_atomic_helper {
-		struct llist_head free_list;
-		struct work_struct free_work;
-	} atomic_helper;
-
 	struct {
 		/* backlight registers and fields in struct intel_panel */
 		struct mutex lock;
diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index 915420d0cef8..d951edb36687 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -1095,7 +1095,7 @@ void intel_display_debugfs_register(struct drm_i915_private *i915)
 
 	for (i = 0; i < ARRAY_SIZE(intel_display_debugfs_files); i++) {
 		debugfs_create_file(intel_display_debugfs_files[i].name,
-				    S_IRUGO | S_IWUSR,
+				    0644,
 				    minor->debugfs_root,
 				    to_i915(minor->dev),
 				    intel_display_debugfs_files[i].fops);
@@ -1116,11 +1116,10 @@ void intel_display_debugfs_register(struct drm_i915_private *i915)
 
 static int i915_panel_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct intel_dp *intel_dp =
-		intel_attached_dp(to_intel_connector(connector));
+	struct intel_connector *connector = m->private;
+	struct intel_dp *intel_dp = intel_attached_dp(connector);
 
-	if (connector->status != connector_status_connected)
+	if (connector->base.status != connector_status_connected)
 		return -ENODEV;
 
 	seq_printf(m, "Panel power up delay: %d\n",
@@ -1138,23 +1137,23 @@ DEFINE_SHOW_ATTRIBUTE(i915_panel);
 
 static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_i915_private *i915 = to_i915(connector->dev);
-	struct intel_connector *intel_connector = to_intel_connector(connector);
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	int ret;
 
 	ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex);
 	if (ret)
 		return ret;
 
-	if (!connector->encoder || connector->status != connector_status_connected) {
+	if (!connector->base.encoder ||
+	    connector->base.status != connector_status_connected) {
 		ret = -ENODEV;
 		goto out;
 	}
 
-	seq_printf(m, "%s:%d HDCP version: ", connector->name,
-		   connector->base.id);
-	intel_hdcp_info(m, intel_connector);
+	seq_printf(m, "%s:%d HDCP version: ", connector->base.name,
+		   connector->base.base.id);
+	intel_hdcp_info(m, connector);
 
 out:
 	drm_modeset_unlock(&i915->drm.mode_config.connection_mutex);
@@ -1165,16 +1164,16 @@ DEFINE_SHOW_ATTRIBUTE(i915_hdcp_sink_capability);
 
 static int i915_lpsp_capability_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_i915_private *i915 = to_i915(connector->dev);
-	struct intel_encoder *encoder;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
+	int connector_type = connector->base.connector_type;
 	bool lpsp_capable = false;
 
-	encoder = intel_attached_encoder(to_intel_connector(connector));
 	if (!encoder)
 		return -ENODEV;
 
-	if (connector->status != connector_status_connected)
+	if (connector->base.status != connector_status_connected)
 		return -ENODEV;
 
 	if (DISPLAY_VER(i915) >= 13)
@@ -1187,15 +1186,15 @@ static int i915_lpsp_capability_show(struct seq_file *m, void *data)
 		 */
 		lpsp_capable = encoder->port <= PORT_B;
 	else if (DISPLAY_VER(i915) == 11)
-		lpsp_capable = (connector->connector_type == DRM_MODE_CONNECTOR_DSI ||
-				connector->connector_type == DRM_MODE_CONNECTOR_eDP);
+		lpsp_capable = (connector_type == DRM_MODE_CONNECTOR_DSI ||
+				connector_type == DRM_MODE_CONNECTOR_eDP);
 	else if (IS_DISPLAY_VER(i915, 9, 10))
 		lpsp_capable = (encoder->port == PORT_A &&
-				(connector->connector_type == DRM_MODE_CONNECTOR_DSI ||
-				 connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
-				 connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort));
+				(connector_type == DRM_MODE_CONNECTOR_DSI ||
+				 connector_type == DRM_MODE_CONNECTOR_eDP ||
+				 connector_type == DRM_MODE_CONNECTOR_DisplayPort));
 	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
-		lpsp_capable = connector->connector_type == DRM_MODE_CONNECTOR_eDP;
+		lpsp_capable = connector_type == DRM_MODE_CONNECTOR_eDP;
 
 	seq_printf(m, "LPSP: %s\n", lpsp_capable ? "capable" : "incapable");
 
@@ -1205,7 +1204,7 @@ DEFINE_SHOW_ATTRIBUTE(i915_lpsp_capability);
 
 static int i915_dsc_fec_support_show(struct seq_file *m, void *data)
 {
-	struct intel_connector *connector = to_intel_connector(m->private);
+	struct intel_connector *connector = m->private;
 	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct drm_crtc *crtc;
 	struct intel_dp *intel_dp;
@@ -1275,13 +1274,13 @@ static ssize_t i915_dsc_fec_support_write(struct file *file,
 					  const char __user *ubuf,
 					  size_t len, loff_t *offp)
 {
+	struct seq_file *m = file->private_data;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	bool dsc_enable = false;
 	int ret;
-	struct drm_connector *connector =
-		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
-	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 	if (len == 0)
 		return 0;
@@ -1319,22 +1318,22 @@ static const struct file_operations i915_dsc_fec_support_fops = {
 
 static int i915_dsc_bpc_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_device *dev = connector->dev;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct drm_crtc *crtc;
 	struct intel_crtc_state *crtc_state;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
 	int ret;
 
 	if (!encoder)
 		return -ENODEV;
 
-	ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex);
+	ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex);
 	if (ret)
 		return ret;
 
-	crtc = connector->state->crtc;
-	if (connector->status != connector_status_connected || !crtc) {
+	crtc = connector->base.state->crtc;
+	if (connector->base.status != connector_status_connected || !crtc) {
 		ret = -ENODEV;
 		goto out;
 	}
@@ -1342,7 +1341,7 @@ static int i915_dsc_bpc_show(struct seq_file *m, void *data)
 	crtc_state = to_intel_crtc_state(crtc->state);
 	seq_printf(m, "Input_BPC: %d\n", crtc_state->dsc.config.bits_per_component);
 
-out:	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+out:	drm_modeset_unlock(&i915->drm.mode_config.connection_mutex);
 
 	return ret;
 }
@@ -1351,9 +1350,9 @@ static ssize_t i915_dsc_bpc_write(struct file *file,
 				  const char __user *ubuf,
 				  size_t len, loff_t *offp)
 {
-	struct drm_connector *connector =
-		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
+	struct seq_file *m = file->private_data;
+	struct intel_connector *connector = m->private;
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	int dsc_bpc = 0;
 	int ret;
@@ -1385,22 +1384,22 @@ static const struct file_operations i915_dsc_bpc_fops = {
 
 static int i915_dsc_output_format_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_device *dev = connector->dev;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct drm_crtc *crtc;
 	struct intel_crtc_state *crtc_state;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
 	int ret;
 
 	if (!encoder)
 		return -ENODEV;
 
-	ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex);
+	ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex);
 	if (ret)
 		return ret;
 
-	crtc = connector->state->crtc;
-	if (connector->status != connector_status_connected || !crtc) {
+	crtc = connector->base.state->crtc;
+	if (connector->base.status != connector_status_connected || !crtc) {
 		ret = -ENODEV;
 		goto out;
 	}
@@ -1409,7 +1408,7 @@ static int i915_dsc_output_format_show(struct seq_file *m, void *data)
 	seq_printf(m, "DSC_Output_Format: %s\n",
 		   intel_output_format_name(crtc_state->output_format));
 
-out:	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+out:	drm_modeset_unlock(&i915->drm.mode_config.connection_mutex);
 
 	return ret;
 }
@@ -1418,9 +1417,9 @@ static ssize_t i915_dsc_output_format_write(struct file *file,
 					    const char __user *ubuf,
 					    size_t len, loff_t *offp)
 {
-	struct drm_connector *connector =
-		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
+	struct seq_file *m = file->private_data;
+	struct intel_connector *connector = m->private;
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	int dsc_output_format = 0;
 	int ret;
@@ -1452,33 +1451,32 @@ static const struct file_operations i915_dsc_output_format_fops = {
 
 static int i915_dsc_fractional_bpp_show(struct seq_file *m, void *data)
 {
-	struct drm_connector *connector = m->private;
-	struct drm_device *dev = connector->dev;
+	struct intel_connector *connector = m->private;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	struct drm_crtc *crtc;
 	struct intel_dp *intel_dp;
-	struct intel_connector *intel_connector = to_intel_connector(connector);
-	struct intel_encoder *encoder = intel_attached_encoder(intel_connector);
 	int ret;
 
 	if (!encoder)
 		return -ENODEV;
 
-	ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex);
+	ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex);
 	if (ret)
 		return ret;
 
-	crtc = connector->state->crtc;
-	if (connector->status != connector_status_connected || !crtc) {
+	crtc = connector->base.state->crtc;
+	if (connector->base.status != connector_status_connected || !crtc) {
 		ret = -ENODEV;
 		goto out;
 	}
 
-	intel_dp = intel_attached_dp(intel_connector);
+	intel_dp = intel_attached_dp(connector);
 	seq_printf(m, "Force_DSC_Fractional_BPP_Enable: %s\n",
 		   str_yes_no(intel_dp->force_dsc_fractional_bpp_en));
 
 out:
-	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+	drm_modeset_unlock(&i915->drm.mode_config.connection_mutex);
 
 	return ret;
 }
@@ -1487,10 +1485,10 @@ static ssize_t i915_dsc_fractional_bpp_write(struct file *file,
 					     const char __user *ubuf,
 					     size_t len, loff_t *offp)
 {
-	struct drm_connector *connector =
-		((struct seq_file *)file->private_data)->private;
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct seq_file *m = file->private_data;
+	struct intel_connector *connector = m->private;
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	bool dsc_fractional_bpp_enable = false;
 	int ret;
@@ -1565,39 +1563,38 @@ DEFINE_SHOW_ATTRIBUTE(intel_crtc_pipe);
 
 /**
  * intel_connector_debugfs_add - add i915 specific connector debugfs files
- * @intel_connector: pointer to a registered drm_connector
+ * @connector: pointer to a registered intel_connector
  *
  * Cleanup will be done by drm_connector_unregister() through a call to
  * drm_debugfs_connector_remove().
  */
-void intel_connector_debugfs_add(struct intel_connector *intel_connector)
+void intel_connector_debugfs_add(struct intel_connector *connector)
 {
-	struct drm_connector *connector = &intel_connector->base;
-	struct dentry *root = connector->debugfs_entry;
-	struct drm_i915_private *dev_priv = to_i915(connector->dev);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct dentry *root = connector->base.debugfs_entry;
+	int connector_type = connector->base.connector_type;
 
 	/* The connector must have been registered beforehands. */
 	if (!root)
 		return;
 
-	intel_drrs_connector_debugfs_add(intel_connector);
-	intel_psr_connector_debugfs_add(intel_connector);
+	intel_drrs_connector_debugfs_add(connector);
+	intel_psr_connector_debugfs_add(connector);
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
-		debugfs_create_file("i915_panel_timings", S_IRUGO, root,
+	if (connector_type == DRM_MODE_CONNECTOR_eDP)
+		debugfs_create_file("i915_panel_timings", 0444, root,
 				    connector, &i915_panel_fops);
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) {
-		debugfs_create_file("i915_hdcp_sink_capability", S_IRUGO, root,
+	if (connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIB) {
+		debugfs_create_file("i915_hdcp_sink_capability", 0444, root,
 				    connector, &i915_hdcp_sink_capability_fops);
 	}
 
-	if (DISPLAY_VER(dev_priv) >= 11 &&
-	    ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort &&
-	    !to_intel_connector(connector)->mst_port) ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_eDP)) {
+	if (DISPLAY_VER(i915) >= 11 &&
+	    ((connector_type == DRM_MODE_CONNECTOR_DisplayPort && !connector->mst_port) ||
+	     connector_type == DRM_MODE_CONNECTOR_eDP)) {
 		debugfs_create_file("i915_dsc_fec_support", 0644, root,
 				    connector, &i915_dsc_fec_support_fops);
 
@@ -1611,11 +1608,11 @@ void intel_connector_debugfs_add(struct intel_connector *intel_connector)
 				    connector, &i915_dsc_fractional_bpp_fops);
 	}
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_DSI ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_HDMIB)
+	if (connector_type == DRM_MODE_CONNECTOR_DSI ||
+	    connector_type == DRM_MODE_CONNECTOR_eDP ||
+	    connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIB)
 		debugfs_create_file("i915_lpsp_capability", 0444, root,
 				    connector, &i915_lpsp_capability_fops);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h
index 4299cc452e05..fe4268813786 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.h
+++ b/drivers/gpu/drm/i915/display/intel_display_device.h
@@ -36,7 +36,7 @@ struct drm_printer;
 #define HAS_ASYNC_FLIPS(i915)		(DISPLAY_VER(i915) >= 5)
 #define HAS_CDCLK_CRAWL(i915)		(DISPLAY_INFO(i915)->has_cdclk_crawl)
 #define HAS_CDCLK_SQUASH(i915)		(DISPLAY_INFO(i915)->has_cdclk_squash)
-#define HAS_CUR_FBC(i915)		(!HAS_GMCH(i915) && DISPLAY_VER(i915) >= 7)
+#define HAS_CUR_FBC(i915)		(!HAS_GMCH(i915) && IS_DISPLAY_VER(i915, 7, 13))
 #define HAS_D12_PLANE_MINIMIZATION(i915) (IS_ROCKETLAKE(i915) || IS_ALDERLAKE_S(i915))
 #define HAS_DDI(i915)			(DISPLAY_INFO(i915)->has_ddi)
 #define HAS_DISPLAY(i915)		(DISPLAY_RUNTIME_INFO(i915)->pipe_mask != 0)
@@ -49,7 +49,7 @@ struct drm_printer;
 #define HAS_DSC(__i915)			(DISPLAY_RUNTIME_INFO(__i915)->has_dsc)
 #define HAS_FBC(i915)			(DISPLAY_RUNTIME_INFO(i915)->fbc_mask != 0)
 #define HAS_FPGA_DBG_UNCLAIMED(i915)	(DISPLAY_INFO(i915)->has_fpga_dbg)
-#define HAS_FW_BLC(i915)		(DISPLAY_VER(i915) > 2)
+#define HAS_FW_BLC(i915)		(DISPLAY_VER(i915) >= 3)
 #define HAS_GMBUS_IRQ(i915)		(DISPLAY_VER(i915) >= 4)
 #define HAS_GMBUS_BURST_READ(i915)	(DISPLAY_VER(i915) >= 10 || IS_KABYLAKE(i915))
 #define HAS_GMCH(i915)			(DISPLAY_INFO(i915)->has_gmch)
diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
index 62f7b10484be..9df9097a0255 100644
--- a/drivers/gpu/drm/i915/display/intel_display_driver.c
+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
@@ -259,10 +259,6 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915)
 	if (ret)
 		goto cleanup_vga_client_pw_domain_dmc;
 
-	init_llist_head(&i915->display.atomic_helper.free_list);
-	INIT_WORK(&i915->display.atomic_helper.free_work,
-		  intel_atomic_helper_free_state_worker);
-
 	intel_init_quirks(i915);
 
 	intel_fbc_init(i915);
@@ -430,9 +426,6 @@ void intel_display_driver_remove(struct drm_i915_private *i915)
 	flush_workqueue(i915->display.wq.flip);
 	flush_workqueue(i915->display.wq.modeset);
 
-	flush_work(&i915->display.atomic_helper.free_work);
-	drm_WARN_ON(&i915->drm, !llist_empty(&i915->display.atomic_helper.free_list));
-
 	/*
 	 * MST topology needs to be suspended so we don't have any calls to
 	 * fbdev after it's finalized. MST will be destroyed later as part of
diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c
index bff4a76310c0..a7d8f3fc98de 100644
--- a/drivers/gpu/drm/i915/display/intel_display_irq.c
+++ b/drivers/gpu/drm/i915/display/intel_display_irq.c
@@ -340,18 +340,15 @@ static void flip_done_handler(struct drm_i915_private *i915,
 			      enum pipe pipe)
 {
 	struct intel_crtc *crtc = intel_crtc_for_pipe(i915, pipe);
-	struct drm_crtc_state *crtc_state = crtc->base.state;
-	struct drm_pending_vblank_event *e = crtc_state->event;
-	struct drm_device *dev = &i915->drm;
-	unsigned long irqflags;
-
-	spin_lock_irqsave(&dev->event_lock, irqflags);
 
-	crtc_state->event = NULL;
+	spin_lock(&i915->drm.event_lock);
 
-	drm_crtc_send_vblank_event(&crtc->base, e);
+	if (crtc->flip_done_event) {
+		drm_crtc_send_vblank_event(&crtc->base, crtc->flip_done_event);
+		crtc->flip_done_event = NULL;
+	}
 
-	spin_unlock_irqrestore(&dev->event_lock, irqflags);
+	spin_unlock(&i915->drm.event_lock);
 }
 
 static void hsw_pipe_crc_irq_handler(struct drm_i915_private *dev_priv,
@@ -896,7 +893,7 @@ gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir)
 	}
 
 	if (!found)
-		drm_err(&dev_priv->drm, "Unexpected DE Misc interrupt\n");
+		drm_err(&dev_priv->drm, "Unexpected DE Misc interrupt: 0x%08x\n", iir);
 }
 
 static void gen11_dsi_te_interrupt_handler(struct drm_i915_private *dev_priv,
@@ -1653,7 +1650,7 @@ void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
 	else if (HAS_PCH_SPLIT(dev_priv))
 		ibx_irq_postinstall(dev_priv);
 
-	if (DISPLAY_VER(dev_priv) <= 10)
+	if (DISPLAY_VER(dev_priv) < 11)
 		de_misc_masked |= GEN8_DE_MISC_GSE;
 
 	if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv))
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index e390595d7341..6fd4fa52253a 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -405,8 +405,8 @@ print_async_put_domains_state(struct i915_power_domains *power_domains)
 						     struct drm_i915_private,
 						     display.power.domains);
 
-	drm_dbg(&i915->drm, "async_put_wakeref %u\n",
-		power_domains->async_put_wakeref);
+	drm_dbg(&i915->drm, "async_put_wakeref: %s\n",
+		str_yes_no(power_domains->async_put_wakeref));
 
 	print_power_domains(power_domains, "async_put_domains[0]",
 			    &power_domains->async_put_domains[0]);
@@ -1697,14 +1697,14 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv,
 	if (resume)
 		intel_dmc_load_program(dev_priv);
 
-	/* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p */
-	if (DISPLAY_VER(dev_priv) >= 12)
+	/* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p,dg2 */
+	if (IS_DISPLAY_IP_RANGE(dev_priv, IP_VER(12, 0), IP_VER(13, 0)))
 		intel_de_rmw(dev_priv, GEN11_CHICKEN_DCPR_2, 0,
 			     DCPR_CLEAR_MEMSTAT_DIS | DCPR_SEND_RESP_IMM |
 			     DCPR_MASK_LPMODE | DCPR_MASK_MAXLATENCY_MEMUP_CLR);
 
 	/* Wa_14011503030:xelpd */
-	if (DISPLAY_VER(dev_priv) >= 13)
+	if (DISPLAY_VER(dev_priv) == 13)
 		intel_de_write(dev_priv, XELPD_DISPLAY_ERR_FATAL_MASK, ~0);
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index b3e942f2eeb0..3fdd8a517983 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -684,8 +684,6 @@ struct intel_atomic_state {
 	bool skip_intermediate_wm;
 
 	bool rps_interactive;
-
-	struct llist_node freed;
 };
 
 struct intel_plane_state {
@@ -1022,7 +1020,6 @@ struct intel_c10pll_state {
 };
 
 struct intel_c20pll_state {
-	u32 link_bit_rate;
 	u32 clock; /* in kHz */
 	u16 tx[3];
 	u16 cmn[4];
@@ -1476,6 +1473,9 @@ struct intel_crtc {
 
 	struct intel_crtc_state *config;
 
+	/* armed event for async flip */
+	struct drm_pending_vblank_event *flip_done_event;
+
 	/* Access to these should be protected by dev_priv->irq_lock. */
 	bool cpu_fifo_underrun_disabled;
 	bool pch_fifo_underrun_disabled;
diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c
index 63e080e07023..b70502586ab9 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc.c
+++ b/drivers/gpu/drm/i915/display/intel_dmc.c
@@ -335,77 +335,6 @@ static void disable_event_handler(struct drm_i915_private *i915,
 	intel_de_write(i915, htp_reg, 0);
 }
 
-static void
-disable_flip_queue_event(struct drm_i915_private *i915,
-			 i915_reg_t ctl_reg, i915_reg_t htp_reg)
-{
-	u32 event_ctl;
-	u32 event_htp;
-
-	event_ctl = intel_de_read(i915, ctl_reg);
-	event_htp = intel_de_read(i915, htp_reg);
-	if (event_ctl != (DMC_EVT_CTL_ENABLE |
-			  DMC_EVT_CTL_RECURRING |
-			  REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK,
-					 DMC_EVT_CTL_TYPE_EDGE_0_1) |
-			  REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK,
-					 DMC_EVT_CTL_EVENT_ID_CLK_MSEC)) ||
-	    !event_htp) {
-		drm_dbg_kms(&i915->drm,
-			    "Unexpected DMC event configuration (control %08x htp %08x)\n",
-			    event_ctl, event_htp);
-		return;
-	}
-
-	disable_event_handler(i915, ctl_reg, htp_reg);
-}
-
-static bool
-get_flip_queue_event_regs(struct drm_i915_private *i915, enum intel_dmc_id dmc_id,
-			  i915_reg_t *ctl_reg, i915_reg_t *htp_reg)
-{
-	if (dmc_id == DMC_FW_MAIN) {
-		if (DISPLAY_VER(i915) == 12) {
-			*ctl_reg = DMC_EVT_CTL(i915, dmc_id, 3);
-			*htp_reg = DMC_EVT_HTP(i915, dmc_id, 3);
-
-			return true;
-		}
-	} else if (dmc_id >= DMC_FW_PIPEA && dmc_id <= DMC_FW_PIPED) {
-		if (IS_DG2(i915)) {
-			*ctl_reg = DMC_EVT_CTL(i915, dmc_id, 2);
-			*htp_reg = DMC_EVT_HTP(i915, dmc_id, 2);
-
-			return true;
-		}
-	}
-
-	return false;
-}
-
-static void
-disable_all_flip_queue_events(struct drm_i915_private *i915)
-{
-	enum intel_dmc_id dmc_id;
-
-	/* TODO: check if the following applies to all D13+ platforms. */
-	if (!IS_DG2(i915) && !IS_TIGERLAKE(i915))
-		return;
-
-	for_each_dmc_id(dmc_id) {
-		i915_reg_t ctl_reg;
-		i915_reg_t htp_reg;
-
-		if (!has_dmc_id_fw(i915, dmc_id))
-			continue;
-
-		if (!get_flip_queue_event_regs(i915, dmc_id, &ctl_reg, &htp_reg))
-			continue;
-
-		disable_flip_queue_event(i915, ctl_reg, htp_reg);
-	}
-}
-
 static void disable_all_event_handlers(struct drm_i915_private *i915)
 {
 	enum intel_dmc_id dmc_id;
@@ -493,6 +422,65 @@ void intel_dmc_disable_pipe(struct drm_i915_private *i915, enum pipe pipe)
 		intel_de_rmw(i915, PIPEDMC_CONTROL(pipe), PIPEDMC_ENABLE, 0);
 }
 
+static bool is_dmc_evt_ctl_reg(struct drm_i915_private *i915,
+			       enum intel_dmc_id dmc_id, i915_reg_t reg)
+{
+	u32 offset = i915_mmio_reg_offset(reg);
+	u32 start = i915_mmio_reg_offset(DMC_EVT_CTL(i915, dmc_id, 0));
+	u32 end = i915_mmio_reg_offset(DMC_EVT_CTL(i915, dmc_id, DMC_EVENT_HANDLER_COUNT_GEN12));
+
+	return offset >= start && offset < end;
+}
+
+static bool is_dmc_evt_htp_reg(struct drm_i915_private *i915,
+			       enum intel_dmc_id dmc_id, i915_reg_t reg)
+{
+	u32 offset = i915_mmio_reg_offset(reg);
+	u32 start = i915_mmio_reg_offset(DMC_EVT_HTP(i915, dmc_id, 0));
+	u32 end = i915_mmio_reg_offset(DMC_EVT_HTP(i915, dmc_id, DMC_EVENT_HANDLER_COUNT_GEN12));
+
+	return offset >= start && offset < end;
+}
+
+static bool disable_dmc_evt(struct drm_i915_private *i915,
+			    enum intel_dmc_id dmc_id,
+			    i915_reg_t reg, u32 data)
+{
+	if (!is_dmc_evt_ctl_reg(i915, dmc_id, reg))
+		return false;
+
+	/* keep all pipe DMC events disabled by default */
+	if (dmc_id != DMC_FW_MAIN)
+		return true;
+
+	/* also disable the flip queue event on the main DMC on TGL */
+	if (IS_TIGERLAKE(i915) &&
+	    REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_CLK_MSEC)
+		return true;
+
+	/* also disable the HRR event on the main DMC on TGL/ADLS */
+	if ((IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915)) &&
+	    REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_VBLANK_A)
+		return true;
+
+	return false;
+}
+
+static u32 dmc_mmiodata(struct drm_i915_private *i915,
+			struct intel_dmc *dmc,
+			enum intel_dmc_id dmc_id, int i)
+{
+	if (disable_dmc_evt(i915, dmc_id,
+			    dmc->dmc_info[dmc_id].mmioaddr[i],
+			    dmc->dmc_info[dmc_id].mmiodata[i]))
+		return REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK,
+				      DMC_EVT_CTL_TYPE_EDGE_0_1) |
+			REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK,
+				       DMC_EVT_CTL_EVENT_ID_FALSE);
+	else
+		return dmc->dmc_info[dmc_id].mmiodata[i];
+}
+
 /**
  * intel_dmc_load_program() - write the firmware from memory to register.
  * @i915: i915 drm device.
@@ -532,7 +520,7 @@ void intel_dmc_load_program(struct drm_i915_private *i915)
 	for_each_dmc_id(dmc_id) {
 		for (i = 0; i < dmc->dmc_info[dmc_id].mmio_count; i++) {
 			intel_de_write(i915, dmc->dmc_info[dmc_id].mmioaddr[i],
-				       dmc->dmc_info[dmc_id].mmiodata[i]);
+				       dmc_mmiodata(i915, dmc, dmc_id, i));
 		}
 	}
 
@@ -540,13 +528,6 @@ void intel_dmc_load_program(struct drm_i915_private *i915)
 
 	gen9_set_dc_state_debugmask(i915);
 
-	/*
-	 * Flip queue events need to be disabled before enabling DC5/6.
-	 * i915 doesn't use the flip queue feature, so disable it already
-	 * here.
-	 */
-	disable_all_flip_queue_events(i915);
-
 	pipedmc_clock_gating_wa(i915, false);
 }
 
@@ -742,9 +723,17 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc,
 		return 0;
 	}
 
+	drm_dbg_kms(&i915->drm, "DMC %d:\n", dmc_id);
 	for (i = 0; i < mmio_count; i++) {
 		dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]);
 		dmc_info->mmiodata[i] = mmiodata[i];
+
+		drm_dbg_kms(&i915->drm, " mmio[%d]: 0x%x = 0x%x%s%s\n",
+			    i, mmioaddr[i], mmiodata[i],
+			    is_dmc_evt_ctl_reg(i915, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_CTL)" :
+			    is_dmc_evt_htp_reg(i915, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_HTP)" : "",
+			    disable_dmc_evt(i915, dmc_id, dmc_info->mmioaddr[i],
+					    dmc_info->mmiodata[i]) ? " (disabling)" : "");
 	}
 	dmc_info->mmio_count = mmio_count;
 	dmc_info->start_mmioaddr = start_mmioaddr;
diff --git a/drivers/gpu/drm/i915/display/intel_dmc_regs.h b/drivers/gpu/drm/i915/display/intel_dmc_regs.h
index cf10094acae3..90d0dbb41cfe 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_dmc_regs.h
@@ -60,6 +60,7 @@
 
 #define DMC_EVT_CTL_EVENT_ID_MASK	REG_GENMASK(15, 8)
 #define DMC_EVT_CTL_EVENT_ID_FALSE	0x01
+#define DMC_EVT_CTL_EVENT_ID_VBLANK_A	0x32 /* main DMC */
 /* An event handler scheduled to run at a 1 kHz frequency. */
 #define DMC_EVT_CTL_EVENT_ID_CLK_MSEC	0xbf
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 1422c2370269..c3b906ebe542 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1227,6 +1227,10 @@ intel_dp_mode_valid(struct drm_connector *_connector,
 	enum drm_mode_status status;
 	bool dsc = false, bigjoiner = false;
 
+	status = intel_cpu_transcoder_mode_valid(dev_priv, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
 		return MODE_H_ILLEGAL;
 
@@ -1886,7 +1890,7 @@ static int dsc_src_max_compressed_bpp(struct intel_dp *intel_dp)
 	 * Max Compressed bpp for Gen 13+ is 27bpp.
 	 * For earlier platform is 23bpp. (Bspec:49259).
 	 */
-	if (DISPLAY_VER(i915) <= 12)
+	if (DISPLAY_VER(i915) < 13)
 		return 23;
 	else
 		return 27;
@@ -2097,7 +2101,7 @@ static int intel_dp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp,
 		}
 	}
 
-	dsc_max_bpc = intel_dp_dsc_min_src_input_bpc(i915);
+	dsc_max_bpc = intel_dp_dsc_max_src_input_bpc(i915);
 	if (!dsc_max_bpc)
 		return -EINVAL;
 
@@ -2844,19 +2848,12 @@ intel_dp_audio_compute_config(struct intel_encoder *encoder,
 			      struct intel_crtc_state *pipe_config,
 			      struct drm_connector_state *conn_state)
 {
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
-	struct drm_connector *connector = conn_state->connector;
-
 	pipe_config->has_audio =
 		intel_dp_has_audio(encoder, pipe_config, conn_state) &&
 		intel_audio_compute_config(encoder, pipe_config, conn_state);
 
 	pipe_config->sdp_split_enable = pipe_config->has_audio &&
 					intel_dp_is_uhbr(pipe_config);
-
-	drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] SDP split enable: %s\n",
-		    connector->base.id, connector->name,
-		    str_yes_no(pipe_config->sdp_split_enable));
 }
 
 int
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index dbc1b66c8ee4..1abfafbbfa75 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -650,19 +650,30 @@ intel_dp_update_link_bw_set(struct intel_dp *intel_dp,
 			    const struct intel_crtc_state *crtc_state,
 			    u8 link_bw, u8 rate_select)
 {
-	u8 link_config[2];
+	u8 lane_count = crtc_state->lane_count;
 
-	/* Write the link configuration data */
-	link_config[0] = link_bw;
-	link_config[1] = crtc_state->lane_count;
 	if (crtc_state->enhanced_framing)
-		link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
-	drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2);
+		lane_count |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+
+	if (link_bw) {
+		/* DP and eDP v1.3 and earlier link bw set method. */
+		u8 link_config[] = { link_bw, lane_count };
 
-	/* eDP 1.4 rate select method. */
-	if (!link_bw)
-		drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET,
-				  &rate_select, 1);
+		drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config,
+				  ARRAY_SIZE(link_config));
+	} else {
+		/*
+		 * eDP v1.4 and later link rate set method.
+		 *
+		 * eDP v1.4x sinks shall ignore DP_LINK_RATE_SET if
+		 * DP_LINK_BW_SET is set. Avoid writing DP_LINK_BW_SET.
+		 *
+		 * eDP v1.5 sinks allow choosing either, and the last choice
+		 * shall be active.
+		 */
+		drm_dp_dpcd_writeb(&intel_dp->aux, DP_LANE_COUNT_SET, lane_count);
+		drm_dp_dpcd_writeb(&intel_dp->aux, DP_LINK_RATE_SET, rate_select);
+	}
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 63364c9602ef..8a9432335030 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -54,7 +54,7 @@ static int intel_dp_mst_check_constraints(struct drm_i915_private *i915, int bpp
 					  struct intel_crtc_state *crtc_state,
 					  bool dsc)
 {
-	if (intel_dp_is_uhbr(crtc_state) && DISPLAY_VER(i915) <= 13 && dsc) {
+	if (intel_dp_is_uhbr(crtc_state) && DISPLAY_VER(i915) < 14 && dsc) {
 		int output_bpp = bpp;
 		/* DisplayPort 2 128b/132b, bits per lane is always 32 */
 		int symbol_clock = crtc_state->port_clock / 32;
@@ -614,7 +614,7 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 
 	intel_dp_audio_compute_config(encoder, pipe_config, conn_state);
 
-	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
+	intel_ddi_compute_min_voltage_level(pipe_config);
 
 	intel_psr_compute_config(intel_dp, pipe_config, conn_state);
 
@@ -1282,6 +1282,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
 		return 0;
 	}
 
+	*status = intel_cpu_transcoder_mode_valid(dev_priv, mode);
+	if (*status != MODE_OK)
+		return 0;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN) {
 		*status = MODE_NO_DBLESCAN;
 		return 0;
@@ -1328,6 +1332,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
 	if (intel_dp_need_bigjoiner(intel_dp, mode->hdisplay, target_clock)) {
 		bigjoiner = true;
 		max_dotclk *= 2;
+
+		/* TODO: add support for bigjoiner */
+		*status = MODE_CLOCK_HIGH;
+		return 0;
 	}
 
 	if (DISPLAY_VER(dev_priv) >= 10 &&
@@ -1362,11 +1370,15 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
 	 * Big joiner configuration needs DSC for TGL which is not true for
 	 * XE_LPD where uncompressed joiner is supported.
 	 */
-	if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc)
-		return MODE_CLOCK_HIGH;
+	if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc) {
+		*status = MODE_CLOCK_HIGH;
+		return 0;
+	}
 
-	if (mode_rate > max_rate && !dsc)
-		return MODE_CLOCK_HIGH;
+	if (mode_rate > max_rate && !dsc) {
+		*status = MODE_CLOCK_HIGH;
+		return 0;
+	}
 
 	*status = intel_mode_valid_max_plane_size(dev_priv, mode, false);
 	return 0;
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 7958d0bd851e..ef57dad1a9cb 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -4537,7 +4537,7 @@ void intel_shared_dpll_state_verify(struct intel_atomic_state *state,
 				"pll active mismatch (didn't expect pipe %c in active mask (0x%x))\n",
 				pipe_name(crtc->pipe), pll->active_mask);
 		I915_STATE_WARN(i915, pll->state.pipe_mask & pipe_mask,
-				"pll enabled crtcs mismatch (found %x in enabled mask (0x%x))\n",
+				"pll enabled crtcs mismatch (found pipe %c in enabled mask (0x%x))\n",
 				pipe_name(crtc->pipe), pll->state.pipe_mask);
 	}
 }
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index 9598d50f68f2..482c28b5c2de 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -341,7 +341,7 @@ static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state)
 }
 
 static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
-			      unsigned int dewake_scanline)
+			      int dewake_scanline)
 {
 	struct intel_crtc *crtc = dsb->crtc;
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 275d0218394c..a5d7fc8418c9 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -922,7 +922,7 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
 		gpiod_add_lookup_table(gpiod_lookup_table);
 
 	if (want_panel_gpio) {
-		intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", flags);
+		intel_dsi->gpio_panel = devm_gpiod_get(dev->dev, "panel", flags);
 		if (IS_ERR(intel_dsi->gpio_panel)) {
 			drm_err(&dev_priv->drm,
 				"Failed to own gpio for panel control\n");
@@ -932,7 +932,7 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
 
 	if (want_backlight_gpio) {
 		intel_dsi->gpio_backlight =
-			gpiod_get(dev->dev, "backlight", flags);
+			devm_gpiod_get(dev->dev, "backlight", flags);
 		if (IS_ERR(intel_dsi->gpio_backlight)) {
 			drm_err(&dev_priv->drm,
 				"Failed to own gpio for backlight control\n");
@@ -943,16 +943,3 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
 	if (gpiod_lookup_table)
 		gpiod_remove_lookup_table(gpiod_lookup_table);
 }
-
-void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi)
-{
-	if (intel_dsi->gpio_panel) {
-		gpiod_put(intel_dsi->gpio_panel);
-		intel_dsi->gpio_panel = NULL;
-	}
-
-	if (intel_dsi->gpio_backlight) {
-		gpiod_put(intel_dsi->gpio_backlight);
-		intel_dsi->gpio_backlight = NULL;
-	}
-}
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.h b/drivers/gpu/drm/i915/display/intel_dsi_vbt.h
index 468d873fab1a..3462fcc760e6 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.h
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.h
@@ -13,7 +13,6 @@ struct intel_dsi;
 
 bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id);
 void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on);
-void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi);
 void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi,
 				 enum mipi_seq seq_id);
 void intel_dsi_log_params(struct intel_dsi *intel_dsi);
diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c
index 55d6743374bd..9111e9d46486 100644
--- a/drivers/gpu/drm/i915/display/intel_dvo.c
+++ b/drivers/gpu/drm/i915/display/intel_dvo.c
@@ -217,11 +217,17 @@ intel_dvo_mode_valid(struct drm_connector *_connector,
 		     struct drm_display_mode *mode)
 {
 	struct intel_connector *connector = to_intel_connector(_connector);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct intel_dvo *intel_dvo = intel_attached_dvo(connector);
 	const struct drm_display_mode *fixed_mode =
 		intel_panel_fixed_mode(connector, mode);
 	int max_dotclk = to_i915(connector->base.dev)->max_dotclk_freq;
 	int target_clock = mode->clock;
+	enum drm_mode_status status;
+
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
 
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index 6d48aa3af95a..0c0144eaa8fa 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -4,7 +4,6 @@
  */
 
 #include <drm/drm_blend.h>
-#include <drm/drm_framebuffer.h>
 #include <drm/drm_modeset_helper.h>
 
 #include <linux/dma-fence.h>
@@ -15,6 +14,7 @@
 #include "intel_display_types.h"
 #include "intel_dpt.h"
 #include "intel_fb.h"
+#include "intel_fb_bo.h"
 #include "intel_frontbuffer.h"
 
 #define check_array_bounds(i915, a, i) drm_WARN_ON(&(i915)->drm, (i) >= ARRAY_SIZE(a))
@@ -301,6 +301,33 @@ lookup_format_info(const struct drm_format_info formats[],
 	return NULL;
 }
 
+unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier)
+{
+	const struct intel_modifier_desc *md;
+	u8 tiling_caps;
+
+	md = lookup_modifier_or_null(fb_modifier);
+	if (!md)
+		return I915_TILING_NONE;
+
+	tiling_caps = lookup_modifier_or_null(fb_modifier)->plane_caps &
+			 INTEL_PLANE_CAP_TILING_MASK;
+
+	switch (tiling_caps) {
+	case INTEL_PLANE_CAP_TILING_Y:
+		return I915_TILING_Y;
+	case INTEL_PLANE_CAP_TILING_X:
+		return I915_TILING_X;
+	case INTEL_PLANE_CAP_TILING_4:
+	case INTEL_PLANE_CAP_TILING_Yf:
+	case INTEL_PLANE_CAP_TILING_NONE:
+		return I915_TILING_NONE;
+	default:
+		MISSING_CASE(tiling_caps);
+		return I915_TILING_NONE;
+	}
+}
+
 /**
  * intel_fb_get_format_info: Get a modifier specific format information
  * @cmd: FB add command structure
@@ -737,26 +764,6 @@ intel_fb_align_height(const struct drm_framebuffer *fb,
 	return ALIGN(height, tile_height);
 }
 
-static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier)
-{
-	u8 tiling_caps = lookup_modifier(fb_modifier)->plane_caps &
-			 INTEL_PLANE_CAP_TILING_MASK;
-
-	switch (tiling_caps) {
-	case INTEL_PLANE_CAP_TILING_Y:
-		return I915_TILING_Y;
-	case INTEL_PLANE_CAP_TILING_X:
-		return I915_TILING_X;
-	case INTEL_PLANE_CAP_TILING_4:
-	case INTEL_PLANE_CAP_TILING_Yf:
-	case INTEL_PLANE_CAP_TILING_NONE:
-		return I915_TILING_NONE;
-	default:
-		MISSING_CASE(tiling_caps);
-		return I915_TILING_NONE;
-	}
-}
-
 bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier)
 {
 	return HAS_DPT(i915) && modifier != DRM_FORMAT_MOD_LINEAR;
@@ -764,7 +771,7 @@ bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier)
 
 bool intel_fb_uses_dpt(const struct drm_framebuffer *fb)
 {
-	return fb && to_i915(fb->dev)->display.params.enable_dpt &&
+	return to_i915(fb->dev)->display.params.enable_dpt &&
 		intel_fb_modifier_uses_dpt(to_i915(fb->dev), fb->modifier);
 }
 
@@ -1374,7 +1381,8 @@ plane_view_scanout_stride(const struct intel_framebuffer *fb, int color_plane,
 	struct drm_i915_private *i915 = to_i915(fb->base.dev);
 	unsigned int stride_tiles;
 
-	if (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14)
+	if ((IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) &&
+	    src_stride_tiles < dst_stride_tiles)
 		stride_tiles = src_stride_tiles;
 	else
 		stride_tiles = dst_stride_tiles;
@@ -1501,8 +1509,20 @@ static u32 calc_plane_remap_info(const struct intel_framebuffer *fb, int color_p
 
 			size += remap_info->size;
 		} else {
-			unsigned int dst_stride = plane_view_dst_stride_tiles(fb, color_plane,
-									      remap_info->width);
+			unsigned int dst_stride;
+
+			/*
+			 * The hardware automagically calculates the CCS AUX surface
+			 * stride from the main surface stride so can't really remap a
+			 * smaller subset (unless we'd remap in whole AUX page units).
+			 */
+			if (intel_fb_needs_pot_stride_remap(fb) &&
+			    intel_fb_is_ccs_modifier(fb->base.modifier))
+				dst_stride = remap_info->src_stride;
+			else
+				dst_stride = remap_info->width;
+
+			dst_stride = plane_view_dst_stride_tiles(fb, color_plane, dst_stride);
 
 			assign_chk_ovf(i915, remap_info->dst_stride, dst_stride);
 			color_plane_info->mapping_stride = dst_stride *
@@ -1657,10 +1677,10 @@ int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer *
 		max_size = max(max_size, offset + size);
 	}
 
-	if (mul_u32_u32(max_size, tile_size) > obj->base.size) {
+	if (mul_u32_u32(max_size, tile_size) > intel_bo_to_drm_bo(obj)->size) {
 		drm_dbg_kms(&i915->drm,
 			    "fb too big for bo (need %llu bytes, have %zu bytes)\n",
-			    mul_u32_u32(max_size, tile_size), obj->base.size);
+			    mul_u32_u32(max_size, tile_size), intel_bo_to_drm_bo(obj)->size);
 		return -EINVAL;
 	}
 
@@ -1881,6 +1901,8 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
 
 	intel_frontbuffer_put(intel_fb->frontbuffer);
 
+	intel_fb_bo_framebuffer_fini(intel_fb_obj(fb));
+
 	kfree(intel_fb);
 }
 
@@ -1889,7 +1911,7 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb,
 						unsigned int *handle)
 {
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct drm_i915_private *i915 = to_i915(intel_bo_to_drm_bo(obj)->dev);
 
 	if (i915_gem_object_is_userptr(obj)) {
 		drm_dbg(&i915->drm,
@@ -1897,7 +1919,7 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb,
 		return -EINVAL;
 	}
 
-	return drm_gem_handle_create(file, &obj->base, handle);
+	return drm_gem_handle_create(file, intel_bo_to_drm_bo(obj), handle);
 }
 
 struct frontbuffer_fence_cb {
@@ -1975,61 +1997,30 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 			   struct drm_i915_gem_object *obj,
 			   struct drm_mode_fb_cmd2 *mode_cmd)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct drm_i915_private *dev_priv = to_i915(intel_bo_to_drm_bo(obj)->dev);
 	struct drm_framebuffer *fb = &intel_fb->base;
 	u32 max_stride;
-	unsigned int tiling, stride;
 	int ret = -EINVAL;
 	int i;
 
-	intel_fb->frontbuffer = intel_frontbuffer_get(obj);
-	if (!intel_fb->frontbuffer)
-		return -ENOMEM;
-
-	i915_gem_object_lock(obj, NULL);
-	tiling = i915_gem_object_get_tiling(obj);
-	stride = i915_gem_object_get_stride(obj);
-	i915_gem_object_unlock(obj);
+	ret = intel_fb_bo_framebuffer_init(intel_fb, obj, mode_cmd);
+	if (ret)
+		return ret;
 
-	if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
-		/*
-		 * If there's a fence, enforce that
-		 * the fb modifier and tiling mode match.
-		 */
-		if (tiling != I915_TILING_NONE &&
-		    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
-			drm_dbg_kms(&dev_priv->drm,
-				    "tiling_mode doesn't match fb modifier\n");
-			goto err;
-		}
-	} else {
-		if (tiling == I915_TILING_X) {
-			mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
-		} else if (tiling == I915_TILING_Y) {
-			drm_dbg_kms(&dev_priv->drm,
-				    "No Y tiling for legacy addfb\n");
-			goto err;
-		}
+	intel_fb->frontbuffer = intel_frontbuffer_get(obj);
+	if (!intel_fb->frontbuffer) {
+		ret = -ENOMEM;
+		goto err;
 	}
 
+	ret = -EINVAL;
 	if (!drm_any_plane_has_format(&dev_priv->drm,
 				      mode_cmd->pixel_format,
 				      mode_cmd->modifier[0])) {
 		drm_dbg_kms(&dev_priv->drm,
 			    "unsupported pixel format %p4cc / modifier 0x%llx\n",
 			    &mode_cmd->pixel_format, mode_cmd->modifier[0]);
-		goto err;
-	}
-
-	/*
-	 * gen2/3 display engine uses the fence if present,
-	 * so the tiling mode must match the fb modifier exactly.
-	 */
-	if (DISPLAY_VER(dev_priv) < 4 &&
-	    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
-		drm_dbg_kms(&dev_priv->drm,
-			    "tiling_mode must match fb modifier exactly on gen2/3\n");
-		goto err;
+		goto err_frontbuffer_put;
 	}
 
 	max_stride = intel_fb_max_stride(dev_priv, mode_cmd->pixel_format,
@@ -2040,18 +2031,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 			    mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ?
 			    "tiled" : "linear",
 			    mode_cmd->pitches[0], max_stride);
-		goto err;
-	}
-
-	/*
-	 * If there's a fence, enforce that
-	 * the fb pitch and fence stride match.
-	 */
-	if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) {
-		drm_dbg_kms(&dev_priv->drm,
-			    "pitch (%d) must match tiling stride (%d)\n",
-			    mode_cmd->pitches[0], stride);
-		goto err;
+		goto err_frontbuffer_put;
 	}
 
 	/* FIXME need to adjust LINOFF/TILEOFF accordingly. */
@@ -2059,7 +2039,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		drm_dbg_kms(&dev_priv->drm,
 			    "plane 0 offset (0x%08x) must be 0\n",
 			    mode_cmd->offsets[0]);
-		goto err;
+		goto err_frontbuffer_put;
 	}
 
 	drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd);
@@ -2070,7 +2050,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		if (mode_cmd->handles[i] != mode_cmd->handles[0]) {
 			drm_dbg_kms(&dev_priv->drm, "bad plane %d handle\n",
 				    i);
-			goto err;
+			goto err_frontbuffer_put;
 		}
 
 		stride_alignment = intel_fb_stride_alignment(fb, i);
@@ -2078,7 +2058,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 			drm_dbg_kms(&dev_priv->drm,
 				    "plane %d pitch (%d) must be at least %u byte aligned\n",
 				    i, fb->pitches[i], stride_alignment);
-			goto err;
+			goto err_frontbuffer_put;
 		}
 
 		if (intel_fb_is_gen12_ccs_aux_plane(fb, i)) {
@@ -2089,7 +2069,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 					    "ccs aux plane %d pitch (%d) must be %d\n",
 					    i,
 					    fb->pitches[i], ccs_aux_stride);
-				goto err;
+				goto err_frontbuffer_put;
 			}
 		}
 
@@ -2098,7 +2078,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 
 	ret = intel_fill_fb_info(dev_priv, intel_fb);
 	if (ret)
-		goto err;
+		goto err_frontbuffer_put;
 
 	if (intel_fb_uses_dpt(fb)) {
 		struct i915_address_space *vm;
@@ -2107,7 +2087,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		if (IS_ERR(vm)) {
 			drm_dbg_kms(&dev_priv->drm, "failed to create DPT\n");
 			ret = PTR_ERR(vm);
-			goto err;
+			goto err_frontbuffer_put;
 		}
 
 		intel_fb->dpt_vm = vm;
@@ -2124,8 +2104,10 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 err_free_dpt:
 	if (intel_fb_uses_dpt(fb))
 		intel_dpt_destroy(intel_fb->dpt_vm);
-err:
+err_frontbuffer_put:
 	intel_frontbuffer_put(intel_fb->frontbuffer);
+err:
+	intel_fb_bo_framebuffer_fini(obj);
 	return ret;
 }
 
@@ -2137,23 +2119,14 @@ intel_user_framebuffer_create(struct drm_device *dev,
 	struct drm_framebuffer *fb;
 	struct drm_i915_gem_object *obj;
 	struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd;
-	struct drm_i915_private *i915;
-
-	obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]);
-	if (!obj)
-		return ERR_PTR(-ENOENT);
-
-	/* object is backed with LMEM for discrete */
-	i915 = to_i915(obj->base.dev);
-	if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM_0)) {
-		/* object is "remote", not in local memory */
-		i915_gem_object_put(obj);
-		drm_dbg_kms(&i915->drm, "framebuffer must reside in local memory\n");
-		return ERR_PTR(-EREMOTE);
-	}
+	struct drm_i915_private *i915 = to_i915(dev);
+
+	obj = intel_fb_bo_lookup_valid_bo(i915, filp, &mode_cmd);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
 
 	fb = intel_framebuffer_create(obj, &mode_cmd);
-	i915_gem_object_put(obj);
+	drm_gem_object_put(intel_bo_to_drm_bo(obj));
 
 	return fb;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h
index e85167d6bc34..23db6628f53e 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.h
+++ b/drivers/gpu/drm/i915/display/intel_fb.h
@@ -95,4 +95,6 @@ intel_user_framebuffer_create(struct drm_device *dev,
 bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier);
 bool intel_fb_uses_dpt(const struct drm_framebuffer *fb);
 
+unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier);
+
 #endif /* __INTEL_FB_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.c b/drivers/gpu/drm/i915/display/intel_fb_bo.c
new file mode 100644
index 000000000000..4be09541e509
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_fb_bo.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/drm_framebuffer.h>
+
+#include "gem/i915_gem_object.h"
+
+#include "i915_drv.h"
+#include "intel_fb.h"
+#include "intel_fb_bo.h"
+
+void intel_fb_bo_framebuffer_fini(struct drm_i915_gem_object *obj)
+{
+	/* Nothing to do for i915 */
+}
+
+int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
+				 struct drm_i915_gem_object *obj,
+				 struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned int tiling, stride;
+
+	i915_gem_object_lock(obj, NULL);
+	tiling = i915_gem_object_get_tiling(obj);
+	stride = i915_gem_object_get_stride(obj);
+	i915_gem_object_unlock(obj);
+
+	if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
+		/*
+		 * If there's a fence, enforce that
+		 * the fb modifier and tiling mode match.
+		 */
+		if (tiling != I915_TILING_NONE &&
+		    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
+			drm_dbg_kms(&i915->drm,
+				    "tiling_mode doesn't match fb modifier\n");
+			return -EINVAL;
+		}
+	} else {
+		if (tiling == I915_TILING_X) {
+			mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
+		} else if (tiling == I915_TILING_Y) {
+			drm_dbg_kms(&i915->drm,
+				    "No Y tiling for legacy addfb\n");
+			return -EINVAL;
+		}
+	}
+
+	/*
+	 * gen2/3 display engine uses the fence if present,
+	 * so the tiling mode must match the fb modifier exactly.
+	 */
+	if (DISPLAY_VER(i915) < 4 &&
+	    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
+		drm_dbg_kms(&i915->drm,
+			    "tiling_mode must match fb modifier exactly on gen2/3\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * If there's a fence, enforce that
+	 * the fb pitch and fence stride match.
+	 */
+	if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) {
+		drm_dbg_kms(&i915->drm,
+			    "pitch (%d) must match tiling stride (%d)\n",
+			    mode_cmd->pitches[0], stride);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+struct drm_i915_gem_object *
+intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+			    struct drm_file *filp,
+			    const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_i915_gem_object *obj;
+
+	obj = i915_gem_object_lookup(filp, mode_cmd->handles[0]);
+	if (!obj)
+		return ERR_PTR(-ENOENT);
+
+	/* object is backed with LMEM for discrete */
+	if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM_0)) {
+		/* object is "remote", not in local memory */
+		i915_gem_object_put(obj);
+		drm_dbg_kms(&i915->drm, "framebuffer must reside in local memory\n");
+		return ERR_PTR(-EREMOTE);
+	}
+
+	return obj;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.h b/drivers/gpu/drm/i915/display/intel_fb_bo.h
new file mode 100644
index 000000000000..232bf898b013
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_fb_bo.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __INTEL_FB_BO_H__
+#define __INTEL_FB_BO_H__
+
+struct drm_file;
+struct drm_mode_fb_cmd2;
+struct drm_i915_gem_object;
+struct drm_i915_private;
+struct intel_framebuffer;
+
+void intel_fb_bo_framebuffer_fini(struct drm_i915_gem_object *obj);
+
+int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
+				 struct drm_i915_gem_object *obj,
+				 struct drm_mode_fb_cmd2 *mode_cmd);
+
+struct drm_i915_gem_object *
+intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+			    struct drm_file *filp,
+			    const struct drm_mode_fb_cmd2 *user_mode_cmd);
+
+#endif
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 63f389a1707d..f17a1afb4929 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1235,7 +1235,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state,
 	 * Recommendation is to keep this combination disabled
 	 * Bspec: 50422 HSD: 14010260002
 	 */
-	if (DISPLAY_VER(i915) >= 12 && crtc_state->has_psr2) {
+	if (IS_DISPLAY_VER(i915, 12, 14) && crtc_state->has_psr2) {
 		plane_state->no_fbc_reason = "PSR2 enabled";
 		return 0;
 	}
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 31d0d695d567..99894a855ef0 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -43,7 +43,6 @@
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 
-#include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_mman.h"
 
 #include "i915_drv.h"
@@ -51,6 +50,7 @@
 #include "intel_fb.h"
 #include "intel_fb_pin.h"
 #include "intel_fbdev.h"
+#include "intel_fbdev_fb.h"
 #include "intel_frontbuffer.h"
 
 struct intel_fbdev {
@@ -146,65 +146,6 @@ static const struct fb_ops intelfb_ops = {
 	.fb_mmap = intel_fbdev_mmap,
 };
 
-static int intelfb_alloc(struct drm_fb_helper *helper,
-			 struct drm_fb_helper_surface_size *sizes)
-{
-	struct intel_fbdev *ifbdev = to_intel_fbdev(helper);
-	struct drm_framebuffer *fb;
-	struct drm_device *dev = helper->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_mode_fb_cmd2 mode_cmd = {};
-	struct drm_i915_gem_object *obj;
-	int size;
-
-	/* we don't do packed 24bpp */
-	if (sizes->surface_bpp == 24)
-		sizes->surface_bpp = 32;
-
-	mode_cmd.width = sizes->surface_width;
-	mode_cmd.height = sizes->surface_height;
-
-	mode_cmd.pitches[0] = ALIGN(mode_cmd.width *
-				    DIV_ROUND_UP(sizes->surface_bpp, 8), 64);
-	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
-							  sizes->surface_depth);
-
-	size = mode_cmd.pitches[0] * mode_cmd.height;
-	size = PAGE_ALIGN(size);
-
-	obj = ERR_PTR(-ENODEV);
-	if (HAS_LMEM(dev_priv)) {
-		obj = i915_gem_object_create_lmem(dev_priv, size,
-						  I915_BO_ALLOC_CONTIGUOUS |
-						  I915_BO_ALLOC_USER);
-	} else {
-		/*
-		 * If the FB is too big, just don't use it since fbdev is not very
-		 * important and we should probably use that space with FBC or other
-		 * features.
-		 *
-		 * Also skip stolen on MTL as Wa_22018444074 mitigation.
-		 */
-		if (!(IS_METEORLAKE(dev_priv)) && size * 2 < dev_priv->dsm.usable_size)
-			obj = i915_gem_object_create_stolen(dev_priv, size);
-		if (IS_ERR(obj))
-			obj = i915_gem_object_create_shmem(dev_priv, size);
-	}
-
-	if (IS_ERR(obj)) {
-		drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj);
-		return PTR_ERR(obj);
-	}
-
-	fb = intel_framebuffer_create(obj, &mode_cmd);
-	i915_gem_object_put(obj);
-	if (IS_ERR(fb))
-		return PTR_ERR(fb);
-
-	ifbdev->fb = to_intel_framebuffer(fb);
-	return 0;
-}
-
 static int intelfb_create(struct drm_fb_helper *helper,
 			  struct drm_fb_helper_surface_size *sizes)
 {
@@ -213,7 +154,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	struct drm_device *dev = helper->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
-	struct i915_ggtt *ggtt = to_gt(dev_priv)->ggtt;
 	const struct i915_gtt_view view = {
 		.type = I915_GTT_VIEW_NORMAL,
 	};
@@ -222,9 +162,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	struct i915_vma *vma;
 	unsigned long flags = 0;
 	bool prealloc = false;
-	void __iomem *vaddr;
 	struct drm_i915_gem_object *obj;
-	struct i915_gem_ww_ctx ww;
 	int ret;
 
 	mutex_lock(&ifbdev->hpd_lock);
@@ -245,12 +183,13 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		intel_fb = ifbdev->fb = NULL;
 	}
 	if (!intel_fb || drm_WARN_ON(dev, !intel_fb_obj(&intel_fb->base))) {
+		struct drm_framebuffer *fb;
 		drm_dbg_kms(&dev_priv->drm,
 			    "no BIOS fb, allocating a new one\n");
-		ret = intelfb_alloc(helper, sizes);
-		if (ret)
-			return ret;
-		intel_fb = ifbdev->fb;
+		fb = intel_fbdev_fb_alloc(helper, sizes);
+		if (IS_ERR(fb))
+			return PTR_ERR(fb);
+		intel_fb = ifbdev->fb = to_intel_framebuffer(fb);
 	} else {
 		drm_dbg_kms(&dev_priv->drm, "re-using BIOS fb\n");
 		prealloc = true;
@@ -283,49 +222,18 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	info->fbops = &intelfb_ops;
 
 	obj = intel_fb_obj(&intel_fb->base);
-	if (i915_gem_object_is_lmem(obj)) {
-		struct intel_memory_region *mem = obj->mm.region;
-
-		/* Use fbdev's framebuffer from lmem for discrete */
-		info->fix.smem_start =
-			(unsigned long)(mem->io_start +
-					i915_gem_object_get_dma_address(obj, 0));
-		info->fix.smem_len = obj->base.size;
-	} else {
-		/* Our framebuffer is the entirety of fbdev's system memory */
-		info->fix.smem_start =
-			(unsigned long)(ggtt->gmadr.start + i915_ggtt_offset(vma));
-		info->fix.smem_len = vma->size;
-	}
-
-	for_i915_gem_ww(&ww, ret, false) {
-		ret = i915_gem_object_lock(vma->obj, &ww);
-
-		if (ret)
-			continue;
-
-		vaddr = i915_vma_pin_iomap(vma);
-		if (IS_ERR(vaddr)) {
-			drm_err(&dev_priv->drm,
-				"Failed to remap framebuffer into virtual memory (%pe)\n", vaddr);
-			ret = PTR_ERR(vaddr);
-			continue;
-		}
-	}
 
+	ret = intel_fbdev_fb_fill_info(dev_priv, info, obj, vma);
 	if (ret)
 		goto out_unpin;
 
-	info->screen_base = vaddr;
-	info->screen_size = vma->size;
-
 	drm_fb_helper_fill_info(info, &ifbdev->helper, sizes);
 
 	/* If the object is shmemfs backed, it will have given us zeroed pages.
 	 * If the object is stolen however, it will be full of whatever
 	 * garbage was left in there.
 	 */
-	if (!i915_gem_object_is_shmem(vma->obj) && !prealloc)
+	if (!i915_gem_object_is_shmem(obj) && !prealloc)
 		memset_io(info->screen_base, 0, info->screen_size);
 
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
@@ -424,12 +332,12 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 			continue;
 		}
 
-		if (obj->base.size > max_size) {
+		if (intel_bo_to_drm_bo(obj)->size > max_size) {
 			drm_dbg_kms(&i915->drm,
 				    "found possible fb from [PLANE:%d:%s]\n",
 				    plane->base.base.id, plane->base.name);
 			fb = to_intel_framebuffer(plane_state->uapi.fb);
-			max_size = obj->base.size;
+			max_size = intel_bo_to_drm_bo(obj)->size;
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c
new file mode 100644
index 000000000000..717c3a3237c4
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_fb_helper.h>
+
+#include "gem/i915_gem_lmem.h"
+
+#include "i915_drv.h"
+#include "intel_display_types.h"
+#include "intel_fbdev_fb.h"
+
+struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
+					     struct drm_fb_helper_surface_size *sizes)
+{
+	struct drm_framebuffer *fb;
+	struct drm_device *dev = helper->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_mode_fb_cmd2 mode_cmd = {};
+	struct drm_i915_gem_object *obj;
+	int size;
+
+	/* we don't do packed 24bpp */
+	if (sizes->surface_bpp == 24)
+		sizes->surface_bpp = 32;
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+
+	mode_cmd.pitches[0] = ALIGN(mode_cmd.width *
+				    DIV_ROUND_UP(sizes->surface_bpp, 8), 64);
+	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
+							  sizes->surface_depth);
+
+	size = mode_cmd.pitches[0] * mode_cmd.height;
+	size = PAGE_ALIGN(size);
+
+	obj = ERR_PTR(-ENODEV);
+	if (HAS_LMEM(dev_priv)) {
+		obj = i915_gem_object_create_lmem(dev_priv, size,
+						  I915_BO_ALLOC_CONTIGUOUS |
+						  I915_BO_ALLOC_USER);
+	} else {
+		/*
+		 * If the FB is too big, just don't use it since fbdev is not very
+		 * important and we should probably use that space with FBC or other
+		 * features.
+		 *
+		 * Also skip stolen on MTL as Wa_22018444074 mitigation.
+		 */
+		if (!(IS_METEORLAKE(dev_priv)) && size * 2 < dev_priv->dsm.usable_size)
+			obj = i915_gem_object_create_stolen(dev_priv, size);
+		if (IS_ERR(obj))
+			obj = i915_gem_object_create_shmem(dev_priv, size);
+	}
+
+	if (IS_ERR(obj)) {
+		drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fb = intel_framebuffer_create(obj, &mode_cmd);
+	i915_gem_object_put(obj);
+
+	return fb;
+}
+
+int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+			     struct drm_i915_gem_object *obj, struct i915_vma *vma)
+{
+	struct i915_gem_ww_ctx ww;
+	void __iomem *vaddr;
+	int ret;
+
+	if (i915_gem_object_is_lmem(obj)) {
+		struct intel_memory_region *mem = obj->mm.region;
+
+		/* Use fbdev's framebuffer from lmem for discrete */
+		info->fix.smem_start =
+			(unsigned long)(mem->io_start +
+					i915_gem_object_get_dma_address(obj, 0));
+		info->fix.smem_len = obj->base.size;
+	} else {
+		struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+
+		/* Our framebuffer is the entirety of fbdev's system memory */
+		info->fix.smem_start =
+			(unsigned long)(ggtt->gmadr.start + i915_ggtt_offset(vma));
+		info->fix.smem_len = vma->size;
+	}
+
+	for_i915_gem_ww(&ww, ret, false) {
+		ret = i915_gem_object_lock(vma->obj, &ww);
+
+		if (ret)
+			continue;
+
+		vaddr = i915_vma_pin_iomap(vma);
+		if (IS_ERR(vaddr)) {
+			drm_err(&i915->drm,
+				"Failed to remap framebuffer into virtual memory (%pe)\n", vaddr);
+			ret = PTR_ERR(vaddr);
+			continue;
+		}
+	}
+
+	if (ret)
+		return ret;
+
+	info->screen_base = vaddr;
+	info->screen_size = intel_bo_to_drm_bo(obj)->size;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h
new file mode 100644
index 000000000000..a395b2c65d33
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_FBDEV_FB_H__
+#define __INTEL_FBDEV_FB_H__
+
+struct drm_fb_helper;
+struct drm_fb_helper_surface_size;
+struct drm_i915_gem_object;
+struct drm_i915_private;
+struct fb_info;
+struct i915_vma;
+
+struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
+					     struct drm_fb_helper_surface_size *sizes);
+int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+			     struct drm_i915_gem_object *obj, struct i915_vma *vma);
+
+#endif
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index ab18cfc19c0a..39e4f5f7c817 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -1983,6 +1983,10 @@ intel_hdmi_mode_valid(struct drm_connector *connector,
 	bool ycbcr_420_only;
 	enum intel_output_format sink_format;
 
+	status = intel_cpu_transcoder_mode_valid(dev_priv, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING)
 		clock *= 2;
 
diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c
index 4b114fde57b1..221f5c6c871b 100644
--- a/drivers/gpu/drm/i915/display/intel_lvds.c
+++ b/drivers/gpu/drm/i915/display/intel_lvds.c
@@ -185,7 +185,7 @@ static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv,
 	/* Convert from 100ms to 100us units */
 	pps->t4 = val * 1000;
 
-	if (DISPLAY_VER(dev_priv) <= 4 &&
+	if (DISPLAY_VER(dev_priv) < 5 &&
 	    pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) {
 		drm_dbg_kms(&dev_priv->drm,
 			    "Panel power timings uninitialized, "
@@ -389,11 +389,16 @@ intel_lvds_mode_valid(struct drm_connector *_connector,
 		      struct drm_display_mode *mode)
 {
 	struct intel_connector *connector = to_intel_connector(_connector);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	const struct drm_display_mode *fixed_mode =
 		intel_panel_fixed_mode(connector, mode);
 	int max_pixclk = to_i915(connector->base.dev)->max_dotclk_freq;
 	enum drm_mode_status status;
 
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
 
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 15c1804dcd59..8f702c3fc62d 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -806,10 +806,10 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
 
 	val |= EDP_PSR2_IDLE_FRAMES(psr_compute_idle_frames(intel_dp));
 
-	if (DISPLAY_VER(dev_priv) <= 13 && !IS_ALDERLAKE_P(dev_priv))
+	if (DISPLAY_VER(dev_priv) < 14 && !IS_ALDERLAKE_P(dev_priv))
 		val |= EDP_SU_TRACK_ENABLE;
 
-	if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) <= 12)
+	if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) < 13)
 		val |= EDP_Y_COORDINATE_ENABLE;
 
 	val |= EDP_PSR2_FRAME_BEFORE_SU(frames_before_su_entry(intel_dp));
@@ -891,13 +891,13 @@ transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder cpu_trans
 		return false;
 }
 
-static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate)
+static u32 intel_get_frame_time_us(const struct intel_crtc_state *crtc_state)
 {
-	if (!cstate || !cstate->hw.active)
+	if (!crtc_state->hw.active)
 		return 0;
 
 	return DIV_ROUND_UP(1000 * 1000,
-			    drm_mode_vrefresh(&cstate->hw.adjusted_mode));
+			    drm_mode_vrefresh(&crtc_state->hw.adjusted_mode));
 }
 
 static void psr2_program_idle_frames(struct intel_dp *intel_dp,
@@ -1094,7 +1094,7 @@ static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_d
 		return true;
 
 	/* Not supported <13 / Wa_22012279113:adl-p */
-	if (DISPLAY_VER(dev_priv) <= 13 || intel_dp->edp_dpcd[0] < DP_EDP_14b)
+	if (DISPLAY_VER(dev_priv) < 14 || intel_dp->edp_dpcd[0] < DP_EDP_14b)
 		return false;
 
 	crtc_state->req_psr2_sdp_prior_scanline = true;
@@ -1221,7 +1221,7 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
 	 * over PSR2.
 	 */
 	if (crtc_state->dsc.compression_enable &&
-	    (DISPLAY_VER(dev_priv) <= 13 && !IS_ALDERLAKE_P(dev_priv))) {
+	    (DISPLAY_VER(dev_priv) < 14 && !IS_ALDERLAKE_P(dev_priv))) {
 		drm_dbg_kms(&dev_priv->drm,
 			    "PSR2 cannot be enabled since DSC is enabled\n");
 		return false;
@@ -1917,81 +1917,6 @@ static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp)
 	intel_de_write(dev_priv, CURSURFLIVE(intel_dp->psr.pipe), 0);
 }
 
-void intel_psr2_disable_plane_sel_fetch_arm(struct intel_plane *plane,
-					    const struct intel_crtc_state *crtc_state)
-{
-	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	enum pipe pipe = plane->pipe;
-
-	if (!crtc_state->enable_psr2_sel_fetch)
-		return;
-
-	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0);
-}
-
-void intel_psr2_program_plane_sel_fetch_arm(struct intel_plane *plane,
-					    const struct intel_crtc_state *crtc_state,
-					    const struct intel_plane_state *plane_state)
-{
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
-	enum pipe pipe = plane->pipe;
-
-	if (!crtc_state->enable_psr2_sel_fetch)
-		return;
-
-	if (plane->id == PLANE_CURSOR)
-		intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
-				  plane_state->ctl);
-	else
-		intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
-				  PLANE_SEL_FETCH_CTL_ENABLE);
-}
-
-void intel_psr2_program_plane_sel_fetch_noarm(struct intel_plane *plane,
-					      const struct intel_crtc_state *crtc_state,
-					      const struct intel_plane_state *plane_state,
-					      int color_plane)
-{
-	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	enum pipe pipe = plane->pipe;
-	const struct drm_rect *clip;
-	u32 val;
-	int x, y;
-
-	if (!crtc_state->enable_psr2_sel_fetch)
-		return;
-
-	if (plane->id == PLANE_CURSOR)
-		return;
-
-	clip = &plane_state->psr2_sel_fetch_area;
-
-	val = (clip->y1 + plane_state->uapi.dst.y1) << 16;
-	val |= plane_state->uapi.dst.x1;
-	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_POS(pipe, plane->id), val);
-
-	x = plane_state->view.color_plane[color_plane].x;
-
-	/*
-	 * From Bspec: UV surface Start Y Position = half of Y plane Y
-	 * start position.
-	 */
-	if (!color_plane)
-		y = plane_state->view.color_plane[color_plane].y + clip->y1;
-	else
-		y = plane_state->view.color_plane[color_plane].y + clip->y1 / 2;
-
-	val = y << 16 | x;
-
-	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_OFFSET(pipe, plane->id),
-			  val);
-
-	/* Sizes are 0 based */
-	val = (drm_rect_height(clip) - 1) << 16;
-	val |= (drm_rect_width(&plane_state->uapi.src) >> 16) - 1;
-	intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_SIZE(pipe, plane->id), val);
-}
-
 void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
@@ -2251,8 +2176,19 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
 			continue;
 
 		inter = pipe_clip;
-		if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst))
+		sel_fetch_area = &new_plane_state->psr2_sel_fetch_area;
+		if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst)) {
+			sel_fetch_area->y1 = -1;
+			sel_fetch_area->y2 = -1;
+			/*
+			 * if plane sel fetch was previously enabled ->
+			 * disable it
+			 */
+			if (drm_rect_height(&old_plane_state->psr2_sel_fetch_area) > 0)
+				crtc_state->update_planes |= BIT(plane->id);
+
 			continue;
+		}
 
 		if (!psr2_sel_fetch_plane_state_supported(new_plane_state)) {
 			full_update = true;
@@ -3383,11 +3319,11 @@ void intel_psr_connector_debugfs_add(struct intel_connector *connector)
 	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct dentry *root = connector->base.debugfs_entry;
 
-	if (connector->base.connector_type != DRM_MODE_CONNECTOR_eDP) {
-		if (!(HAS_DP20(i915) &&
-		      connector->base.connector_type == DRM_MODE_CONNECTOR_DisplayPort))
-			return;
-	}
+	/* TODO: Add support for MST connectors as well. */
+	if ((connector->base.connector_type != DRM_MODE_CONNECTOR_eDP &&
+	     connector->base.connector_type != DRM_MODE_CONNECTOR_DisplayPort) ||
+	    connector->mst_port)
+		return;
 
 	debugfs_create_file("i915_psr_sink_status", 0444, root,
 			    connector, &i915_psr_sink_status_fops);
diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
index 6a1f4573852b..143e0595c097 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.h
+++ b/drivers/gpu/drm/i915/display/intel_psr.h
@@ -55,16 +55,6 @@ bool intel_psr_enabled(struct intel_dp *intel_dp);
 int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
 				struct intel_crtc *crtc);
 void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state);
-void intel_psr2_program_plane_sel_fetch_noarm(struct intel_plane *plane,
-					      const struct intel_crtc_state *crtc_state,
-					      const struct intel_plane_state *plane_state,
-					      int color_plane);
-void intel_psr2_program_plane_sel_fetch_arm(struct intel_plane *plane,
-					    const struct intel_crtc_state *crtc_state,
-					    const struct intel_plane_state *plane_state);
-
-void intel_psr2_disable_plane_sel_fetch_arm(struct intel_plane *plane,
-					    const struct intel_crtc_state *crtc_state);
 void intel_psr_pause(struct intel_dp *intel_dp);
 void intel_psr_resume(struct intel_dp *intel_dp);
 
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index bcb4959df70d..9218047495fb 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -1931,13 +1931,19 @@ static enum drm_mode_status
 intel_sdvo_mode_valid(struct drm_connector *connector,
 		      struct drm_display_mode *mode)
 {
+	struct drm_i915_private *i915 = to_i915(connector->dev);
 	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector));
 	struct intel_sdvo_connector *intel_sdvo_connector =
 		to_intel_sdvo_connector(connector);
-	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 	bool has_hdmi_sink = intel_has_hdmi_sink(intel_sdvo_connector, connector->state);
+	int max_dotclk = i915->max_dotclk_freq;
+	enum drm_mode_status status;
 	int clock = mode->clock;
 
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
 
diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c
index ce5a73a4cc89..bc61e736f9b3 100644
--- a/drivers/gpu/drm/i915/display/intel_snps_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c
@@ -3,7 +3,7 @@
  * Copyright © 2019 Intel Corporation
  */
 
-#include <linux/util_macros.h>
+#include <linux/math.h>
 
 #include "i915_reg.h"
 #include "intel_ddi.h"
diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
index f64d348a969e..dcf05e00e505 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -1030,18 +1030,25 @@ static bool xelpdp_tc_phy_enable_tcss_power(struct intel_tc_port *tc, bool enabl
 
 	__xelpdp_tc_phy_enable_tcss_power(tc, enable);
 
-	if ((!tc_phy_wait_for_ready(tc) ||
-	     !xelpdp_tc_phy_wait_for_tcss_power(tc, enable)) &&
-	    !drm_WARN_ON(&i915->drm, tc->mode == TC_PORT_LEGACY)) {
-		if (enable) {
-			__xelpdp_tc_phy_enable_tcss_power(tc, false);
-			xelpdp_tc_phy_wait_for_tcss_power(tc, false);
-		}
+	if (enable && !tc_phy_wait_for_ready(tc))
+		goto out_disable;
 
-		return false;
-	}
+	if (!xelpdp_tc_phy_wait_for_tcss_power(tc, enable))
+		goto out_disable;
 
 	return true;
+
+out_disable:
+	if (drm_WARN_ON(&i915->drm, tc->mode == TC_PORT_LEGACY))
+		return false;
+
+	if (!enable)
+		return false;
+
+	__xelpdp_tc_phy_enable_tcss_power(tc, false);
+	xelpdp_tc_phy_wait_for_tcss_power(tc, false);
+
+	return false;
 }
 
 static void xelpdp_tc_phy_take_ownership(struct intel_tc_port *tc, bool take)
diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c
index 31a79fdfc812..d4386cb3569e 100644
--- a/drivers/gpu/drm/i915/display/intel_tv.c
+++ b/drivers/gpu/drm/i915/display/intel_tv.c
@@ -958,8 +958,14 @@ static enum drm_mode_status
 intel_tv_mode_valid(struct drm_connector *connector,
 		    struct drm_display_mode *mode)
 {
+	struct drm_i915_private *i915 = to_i915(connector->dev);
 	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
-	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
+	int max_dotclk = i915->max_dotclk_freq;
+	enum drm_mode_status status;
+
+	status = intel_cpu_transcoder_mode_valid(i915, mode);
+	if (status != MODE_OK)
+		return status;
 
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
@@ -1411,9 +1417,6 @@ set_tv_mode_timings(struct drm_i915_private *dev_priv,
 static void set_color_conversion(struct drm_i915_private *dev_priv,
 				 const struct color_conversion *color_conversion)
 {
-	if (!color_conversion)
-		return;
-
 	intel_de_write(dev_priv, TV_CSC_Y,
 		       (color_conversion->ry << 16) | color_conversion->gy);
 	intel_de_write(dev_priv, TV_CSC_Y2,
@@ -1448,9 +1451,6 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state,
 	int xpos, ypos;
 	unsigned int xsize, ysize;
 
-	if (!tv_mode)
-		return;	/* can't happen (mode_prepare prevents this) */
-
 	tv_ctl = intel_de_read(dev_priv, TV_CTL);
 	tv_ctl &= TV_CTL_SAVE;
 
diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c
index 2cec2abf9746..fe256bf7b485 100644
--- a/drivers/gpu/drm/i915/display/intel_vblank.c
+++ b/drivers/gpu/drm/i915/display/intel_vblank.c
@@ -265,6 +265,32 @@ int intel_crtc_scanline_to_hw(struct intel_crtc *crtc, int scanline)
 	return (scanline + vtotal - crtc->scanline_offset) % vtotal;
 }
 
+/*
+ * The uncore version of the spin lock functions is used to decide
+ * whether we need to lock the uncore lock or not.  This is only
+ * needed in i915, not in Xe.
+ *
+ * This lock in i915 is needed because some old platforms (at least
+ * IVB and possibly HSW as well), which are not supported in Xe, need
+ * all register accesses to the same cacheline to be serialized,
+ * otherwise they may hang.
+ */
+static void intel_vblank_section_enter(struct drm_i915_private *i915)
+	__acquires(i915->uncore.lock)
+{
+#ifdef I915
+	spin_lock(&i915->uncore.lock);
+#endif
+}
+
+static void intel_vblank_section_exit(struct drm_i915_private *i915)
+	__releases(i915->uncore.lock)
+{
+#ifdef I915
+	spin_unlock(&i915->uncore.lock);
+#endif
+}
+
 static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
 				     bool in_vblank_irq,
 				     int *vpos, int *hpos,
@@ -302,11 +328,12 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
 	}
 
 	/*
-	 * Lock uncore.lock, as we will do multiple timing critical raw
-	 * register reads, potentially with preemption disabled, so the
-	 * following code must not block on uncore.lock.
+	 * Enter vblank critical section, as we will do multiple
+	 * timing critical raw register reads, potentially with
+	 * preemption disabled, so the following code must not block.
 	 */
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	local_irq_save(irqflags);
+	intel_vblank_section_enter(dev_priv);
 
 	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
 
@@ -374,7 +401,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
 
 	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
 
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+	intel_vblank_section_exit(dev_priv);
+	local_irq_restore(irqflags);
 
 	/*
 	 * While in vblank, position will be negative
@@ -412,9 +440,13 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc)
 	unsigned long irqflags;
 	int position;
 
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	local_irq_save(irqflags);
+	intel_vblank_section_enter(dev_priv);
+
 	position = __intel_get_crtc_scanline(crtc);
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+
+	intel_vblank_section_exit(dev_priv);
+	local_irq_restore(irqflags);
 
 	return position;
 }
@@ -537,7 +569,7 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state,
 	 * Need to audit everything to make sure it's safe.
 	 */
 	spin_lock_irqsave(&i915->drm.vblank_time_lock, irqflags);
-	spin_lock(&i915->uncore.lock);
+	intel_vblank_section_enter(i915);
 
 	drm_calc_timestamping_constants(&crtc->base, &adjusted_mode);
 
@@ -546,7 +578,6 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state,
 	crtc->mode_flags = mode_flags;
 
 	crtc->scanline_offset = intel_crtc_scanline_offset(crtc_state);
-
-	spin_unlock(&i915->uncore.lock);
+	intel_vblank_section_exit(i915);
 	spin_unlock_irqrestore(&i915->drm.vblank_time_lock, irqflags);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c
index 5f2fb702e367..17d6572f9d0a 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc.c
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
@@ -812,13 +812,13 @@ void intel_dsc_disable(const struct intel_crtc_state *old_crtc_state)
 }
 
 static u32 intel_dsc_pps_read(struct intel_crtc_state *crtc_state, int pps,
-			      bool *check_equal)
+			      bool *all_equal)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	i915_reg_t dsc_reg[2];
 	int i, vdsc_per_pipe, dsc_reg_num;
-	u32 val = 0;
+	u32 val;
 
 	vdsc_per_pipe = intel_dsc_get_vdsc_per_pipe(crtc_state);
 	dsc_reg_num = min_t(int, ARRAY_SIZE(dsc_reg), vdsc_per_pipe);
@@ -827,20 +827,13 @@ static u32 intel_dsc_pps_read(struct intel_crtc_state *crtc_state, int pps,
 
 	intel_dsc_get_pps_reg(crtc_state, pps, dsc_reg, dsc_reg_num);
 
-	if (check_equal)
-		*check_equal = true;
-
-	for (i = 0; i < dsc_reg_num; i++) {
-		u32 tmp;
+	*all_equal = true;
 
-		tmp = intel_de_read(i915, dsc_reg[i]);
+	val = intel_de_read(i915, dsc_reg[0]);
 
-		if (i == 0) {
-			val = tmp;
-		} else if (check_equal && tmp != val) {
-			*check_equal = false;
-			break;
-		} else if (!check_equal) {
+	for (i = 1; i < dsc_reg_num; i++) {
+		if (intel_de_read(i915, dsc_reg[i]) != val) {
+			*all_equal = false;
 			break;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c
index 1e7c97243fcf..8a934bada624 100644
--- a/drivers/gpu/drm/i915/display/skl_scaler.c
+++ b/drivers/gpu/drm/i915/display/skl_scaler.c
@@ -504,7 +504,6 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 {
 	struct drm_plane *plane = NULL;
 	struct intel_plane *intel_plane;
-	struct intel_plane_state *plane_state = NULL;
 	struct intel_crtc_scaler_state *scaler_state =
 		&crtc_state->scaler_state;
 	struct drm_atomic_state *drm_state = crtc_state->uapi.state;
@@ -536,6 +535,7 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 
 	/* walkthrough scaler_users bits and start assigning scalers */
 	for (i = 0; i < sizeof(scaler_state->scaler_users) * 8; i++) {
+		struct intel_plane_state *plane_state = NULL;
 		int *scaler_id;
 		const char *name;
 		int idx, ret;
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index f5c77a018e10..511dc1544854 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -18,6 +18,7 @@
 #include "intel_fbc.h"
 #include "intel_frontbuffer.h"
 #include "intel_psr.h"
+#include "intel_psr_regs.h"
 #include "skl_scaler.h"
 #include "skl_universal_plane.h"
 #include "skl_watermark.h"
@@ -629,6 +630,18 @@ skl_plane_disable_arm(struct intel_plane *plane,
 	intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), 0);
 }
 
+static void icl_plane_disable_sel_fetch_arm(struct intel_plane *plane,
+					    const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0);
+}
+
 static void
 icl_plane_disable_arm(struct intel_plane *plane,
 		      const struct intel_crtc_state *crtc_state)
@@ -642,7 +655,7 @@ icl_plane_disable_arm(struct intel_plane *plane,
 
 	skl_write_plane_wm(plane, crtc_state);
 
-	intel_psr2_disable_plane_sel_fetch_arm(plane, crtc_state);
+	icl_plane_disable_sel_fetch_arm(plane, crtc_state);
 	intel_de_write_fw(dev_priv, PLANE_CTL(pipe, plane_id), 0);
 	intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), 0);
 }
@@ -1197,6 +1210,48 @@ skl_plane_update_arm(struct intel_plane *plane,
 			  skl_plane_surf(plane_state, 0));
 }
 
+static void icl_plane_update_sel_fetch_noarm(struct intel_plane *plane,
+					     const struct intel_crtc_state *crtc_state,
+					     const struct intel_plane_state *plane_state,
+					     int color_plane)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+	const struct drm_rect *clip;
+	u32 val;
+	int x, y;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	clip = &plane_state->psr2_sel_fetch_area;
+
+	val = (clip->y1 + plane_state->uapi.dst.y1) << 16;
+	val |= plane_state->uapi.dst.x1;
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_POS(pipe, plane->id), val);
+
+	x = plane_state->view.color_plane[color_plane].x;
+
+	/*
+	 * From Bspec: UV surface Start Y Position = half of Y plane Y
+	 * start position.
+	 */
+	if (!color_plane)
+		y = plane_state->view.color_plane[color_plane].y + clip->y1;
+	else
+		y = plane_state->view.color_plane[color_plane].y + clip->y1 / 2;
+
+	val = y << 16 | x;
+
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_OFFSET(pipe, plane->id),
+			  val);
+
+	/* Sizes are 0 based */
+	val = (drm_rect_height(clip) - 1) << 16;
+	val |= (drm_rect_width(&plane_state->uapi.src) >> 16) - 1;
+	intel_de_write_fw(i915, PLANE_SEL_FETCH_SIZE(pipe, plane->id), val);
+}
+
 static void
 icl_plane_update_noarm(struct intel_plane *plane,
 		       const struct intel_crtc_state *crtc_state,
@@ -1269,7 +1324,24 @@ icl_plane_update_noarm(struct intel_plane *plane,
 	if (plane_state->force_black)
 		icl_plane_csc_load_black(plane);
 
-	intel_psr2_program_plane_sel_fetch_noarm(plane, crtc_state, plane_state, color_plane);
+	icl_plane_update_sel_fetch_noarm(plane, crtc_state, plane_state, color_plane);
+}
+
+static void icl_plane_update_sel_fetch_arm(struct intel_plane *plane,
+					   const struct intel_crtc_state *crtc_state,
+					   const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+
+	if (!crtc_state->enable_psr2_sel_fetch)
+		return;
+
+	if (drm_rect_height(&plane_state->psr2_sel_fetch_area) > 0)
+		intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id),
+				  PLANE_SEL_FETCH_CTL_ENABLE);
+	else
+		icl_plane_disable_sel_fetch_arm(plane, crtc_state);
 }
 
 static void
@@ -1296,7 +1368,7 @@ icl_plane_update_arm(struct intel_plane *plane,
 	if (plane_state->scaler_id >= 0)
 		skl_program_plane_scaler(plane, crtc_state, plane_state);
 
-	intel_psr2_program_plane_sel_fetch_arm(plane, crtc_state, plane_state);
+	icl_plane_update_sel_fetch_arm(plane, crtc_state, plane_state);
 
 	/*
 	 * The control register self-arms if the plane was previously
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index bda49734ca33..9b33b8a74d64 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -1532,21 +1532,29 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder)
 	}
 }
 
-static void intel_dsi_encoder_destroy(struct drm_encoder *encoder)
+static const struct drm_encoder_funcs intel_dsi_funcs = {
+	.destroy = intel_encoder_destroy,
+};
+
+static enum drm_mode_status vlv_dsi_mode_valid(struct drm_connector *connector,
+					       struct drm_display_mode *mode)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder));
+	struct drm_i915_private *i915 = to_i915(connector->dev);
 
-	intel_dsi_vbt_gpio_cleanup(intel_dsi);
-	intel_encoder_destroy(encoder);
-}
+	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+		enum drm_mode_status status;
 
-static const struct drm_encoder_funcs intel_dsi_funcs = {
-	.destroy = intel_dsi_encoder_destroy,
-};
+		status = intel_cpu_transcoder_mode_valid(i915, mode);
+		if (status != MODE_OK)
+			return status;
+	}
+
+	return intel_dsi_mode_valid(connector, mode);
+}
 
 static const struct drm_connector_helper_funcs intel_dsi_connector_helper_funcs = {
 	.get_modes = intel_dsi_get_modes,
-	.mode_valid = intel_dsi_mode_valid,
+	.mode_valid = vlv_dsi_mode_valid,
 	.atomic_check = intel_digital_connector_atomic_check,
 };
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e38f06a6e56e..dcbfe32fd30c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -279,7 +279,8 @@ static int proto_context_set_protected(struct drm_i915_private *i915,
 }
 
 static struct i915_gem_proto_context *
-proto_context_create(struct drm_i915_private *i915, unsigned int flags)
+proto_context_create(struct drm_i915_file_private *fpriv,
+		     struct drm_i915_private *i915, unsigned int flags)
 {
 	struct i915_gem_proto_context *pc, *err;
 
@@ -287,6 +288,7 @@ proto_context_create(struct drm_i915_private *i915, unsigned int flags)
 	if (!pc)
 		return ERR_PTR(-ENOMEM);
 
+	pc->fpriv = fpriv;
 	pc->num_user_engines = -1;
 	pc->user_engines = NULL;
 	pc->user_flags = BIT(UCONTEXT_BANNABLE) |
@@ -1622,6 +1624,7 @@ i915_gem_create_context(struct drm_i915_private *i915,
 			err = PTR_ERR(ppgtt);
 			goto err_ctx;
 		}
+		ppgtt->vm.fpriv = pc->fpriv;
 		vm = &ppgtt->vm;
 	}
 	if (vm)
@@ -1741,7 +1744,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	/* 0 reserved for invalid/unassigned ppgtt */
 	xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1);
 
-	pc = proto_context_create(i915, 0);
+	pc = proto_context_create(file_priv, i915, 0);
 	if (IS_ERR(pc)) {
 		err = PTR_ERR(pc);
 		goto err;
@@ -1823,6 +1826,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
 
 	GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */
 	args->vm_id = id;
+	ppgtt->vm.fpriv = file_priv;
 	return 0;
 
 err_put:
@@ -2285,7 +2289,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 		return -EIO;
 	}
 
-	ext_data.pc = proto_context_create(i915, args->flags);
+	ext_data.pc = proto_context_create(file->driver_priv, i915,
+					   args->flags);
 	if (IS_ERR(ext_data.pc))
 		return PTR_ERR(ext_data.pc);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index cb78214a7dcd..03bc7f9d191b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -188,6 +188,9 @@ struct i915_gem_proto_engine {
  * CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE.
  */
 struct i915_gem_proto_context {
+	/** @fpriv: Client which creates the context */
+	struct drm_i915_file_private *fpriv;
+
 	/** @vm: See &i915_gem_context.vm */
 	struct i915_address_space *vm;
 
@@ -409,9 +412,9 @@ struct i915_gem_context {
 
 	/** @stale: tracks stale engines to be destroyed */
 	struct {
-		/** @lock: guards engines */
+		/** @stale.lock: guards engines */
 		spinlock_t lock;
-		/** @engines: list of stale engines */
+		/** @stale.engines: list of stale engines */
 		struct list_head engines;
 	} stale;
 };
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index ccc077b74d2d..555022c0652c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -254,6 +254,8 @@ struct i915_execbuffer {
 	struct intel_gt *gt; /* gt for the execbuf */
 	struct intel_context *context; /* logical state for the request */
 	struct i915_gem_context *gem_context; /** caller's context */
+	intel_wakeref_t wakeref;
+	intel_wakeref_t wakeref_gt0;
 
 	/** our requests to build */
 	struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
@@ -1157,7 +1159,7 @@ static void reloc_cache_unmap(struct reloc_cache *cache)
 
 	vaddr = unmask_page(cache->vaddr);
 	if (cache->vaddr & KMAP)
-		kunmap_atomic(vaddr);
+		kunmap_local(vaddr);
 	else
 		io_mapping_unmap_atomic((void __iomem *)vaddr);
 }
@@ -1173,7 +1175,7 @@ static void reloc_cache_remap(struct reloc_cache *cache,
 	if (cache->vaddr & KMAP) {
 		struct page *page = i915_gem_object_get_page(obj, cache->page);
 
-		vaddr = kmap_atomic(page);
+		vaddr = kmap_local_page(page);
 		cache->vaddr = unmask_flags(cache->vaddr) |
 			(unsigned long)vaddr;
 	} else {
@@ -1203,7 +1205,7 @@ static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer
 		if (cache->vaddr & CLFLUSH_AFTER)
 			mb();
 
-		kunmap_atomic(vaddr);
+		kunmap_local(vaddr);
 		i915_gem_object_finish_access(obj);
 	} else {
 		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
@@ -1235,7 +1237,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 	struct page *page;
 
 	if (cache->vaddr) {
-		kunmap_atomic(unmask_page(cache->vaddr));
+		kunmap_local(unmask_page(cache->vaddr));
 	} else {
 		unsigned int flushes;
 		int err;
@@ -1257,7 +1259,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 	if (!obj->mm.dirty)
 		set_page_dirty(page);
 
-	vaddr = kmap_atomic(page);
+	vaddr = kmap_local_page(page);
 	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
 	cache->page = pageno;
 
@@ -1679,7 +1681,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
 		urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
 		size = nreloc * sizeof(*relocs);
 
-		relocs = kvmalloc_array(size, 1, GFP_KERNEL);
+		relocs = kvmalloc_array(1, size, GFP_KERNEL);
 		if (!relocs) {
 			err = -ENOMEM;
 			goto err;
@@ -2720,13 +2722,13 @@ eb_select_engine(struct i915_execbuffer *eb)
 
 	for_each_child(ce, child)
 		intel_context_get(child);
-	intel_gt_pm_get(gt);
+	eb->wakeref = intel_gt_pm_get(ce->engine->gt);
 	/*
 	 * Keep GT0 active on MTL so that i915_vma_parked() doesn't
 	 * free VMAs while execbuf ioctl is validating VMAs.
 	 */
 	if (gt->info.id)
-		intel_gt_pm_get(to_gt(gt->i915));
+		eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915));
 
 	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
 		err = intel_context_alloc_state(ce);
@@ -2766,9 +2768,9 @@ eb_select_engine(struct i915_execbuffer *eb)
 
 err:
 	if (gt->info.id)
-		intel_gt_pm_put(to_gt(gt->i915));
+		intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0);
 
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(ce->engine->gt, eb->wakeref);
 	for_each_child(ce, child)
 		intel_context_put(child);
 	intel_context_put(ce);
@@ -2786,8 +2788,8 @@ eb_put_engine(struct i915_execbuffer *eb)
 	 * i915_vma_parked() from interfering while execbuf validates vmas.
 	 */
 	if (eb->gt->info.id)
-		intel_gt_pm_put(to_gt(eb->gt->i915));
-	intel_gt_pm_put(eb->gt);
+		intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0);
+	intel_gt_pm_put(eb->context->engine->gt, eb->wakeref);
 	for_each_child(eb->context, child)
 		intel_context_put(child);
 	intel_context_put(eb->context);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index c26d87555825..58e6c680fe0d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -106,6 +106,10 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	INIT_LIST_HEAD(&obj->mm.link);
 
+#ifdef CONFIG_PROC_FS
+	INIT_LIST_HEAD(&obj->client_link);
+#endif
+
 	INIT_LIST_HEAD(&obj->lut_list);
 	spin_lock_init(&obj->lut_lock);
 
@@ -293,6 +297,10 @@ void __i915_gem_free_object_rcu(struct rcu_head *head)
 		container_of(head, typeof(*obj), rcu);
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
+	/* We need to keep this alive for RCU read access from fdinfo. */
+	if (obj->mm.n_placements > 1)
+		kfree(obj->mm.placements);
+
 	i915_gem_object_free(obj);
 
 	GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
@@ -389,9 +397,6 @@ void __i915_gem_free_object(struct drm_i915_gem_object *obj)
 	if (obj->ops->release)
 		obj->ops->release(obj);
 
-	if (obj->mm.n_placements > 1)
-		kfree(obj->mm.placements);
-
 	if (obj->shares_resv_from)
 		i915_vm_resv_put(obj->shares_resv_from);
 
@@ -442,6 +447,8 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
 
 	GEM_BUG_ON(i915_gem_object_is_framebuffer(obj));
 
+	i915_drm_client_remove_object(obj);
+
 	/*
 	 * Before we free the object, make sure any pure RCU-only
 	 * read-side critical sections are complete, e.g.
@@ -493,17 +500,15 @@ static void
 i915_gem_object_read_from_page_kmap(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size)
 {
 	pgoff_t idx = offset >> PAGE_SHIFT;
-	void *src_map;
 	void *src_ptr;
 
-	src_map = kmap_atomic(i915_gem_object_get_page(obj, idx));
-
-	src_ptr = src_map + offset_in_page(offset);
+	src_ptr = kmap_local_page(i915_gem_object_get_page(obj, idx))
+	          + offset_in_page(offset);
 	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 		drm_clflush_virt_range(src_ptr, size);
 	memcpy(dst, src_ptr, size);
 
-	kunmap_atomic(src_map);
+	kunmap_local(src_ptr);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 2292404007c8..0c5cdab278b6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -302,6 +302,18 @@ struct drm_i915_gem_object {
 	 */
 	struct i915_address_space *shares_resv_from;
 
+#ifdef CONFIG_PROC_FS
+	/**
+	 * @client: @i915_drm_client which created the object
+	 */
+	struct i915_drm_client *client;
+
+	/**
+	 * @client_link: Link into @i915_drm_client.objects_list
+	 */
+	struct list_head client_link;
+#endif
+
 	union {
 		struct rcu_head rcu;
 		struct llist_node freed;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 5df128e2f4dc..ef85c6dc9fd5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -65,16 +65,13 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 	dst = vaddr;
 	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 		struct page *page;
-		void *src;
 
 		page = shmem_read_mapping_page(mapping, i);
 		if (IS_ERR(page))
 			goto err_st;
 
-		src = kmap_atomic(page);
-		memcpy(dst, src, PAGE_SIZE);
+		memcpy_from_page(dst, page, 0, PAGE_SIZE);
 		drm_clflush_virt_range(dst, PAGE_SIZE);
-		kunmap_atomic(src);
 
 		put_page(page);
 		dst += PAGE_SIZE;
@@ -113,16 +110,13 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
 
 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 			struct page *page;
-			char *dst;
 
 			page = shmem_read_mapping_page(mapping, i);
 			if (IS_ERR(page))
 				continue;
 
-			dst = kmap_atomic(page);
 			drm_clflush_virt_range(src, PAGE_SIZE);
-			memcpy(dst, src, PAGE_SIZE);
-			kunmap_atomic(dst);
+			memcpy_to_page(page, 0, src, PAGE_SIZE);
 
 			set_page_dirty(page);
 			if (obj->mm.madv == I915_MADV_WILLNEED)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 73a4a4eb29e0..38b72d86560f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -485,11 +485,13 @@ shmem_pwrite(struct drm_i915_gem_object *obj,
 		if (err < 0)
 			return err;
 
-		vaddr = kmap_atomic(page);
+		vaddr = kmap_local_page(page);
+		pagefault_disable();
 		unwritten = __copy_from_user_inatomic(vaddr + pg,
 						      user_data,
 						      len);
-		kunmap_atomic(vaddr);
+		pagefault_enable();
+		kunmap_local(vaddr);
 
 		err = aops->write_end(obj->base.filp, mapping, offset, len,
 				      len - unwritten, page, data);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 1a766d8e7cce..8c88075eeab2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -386,6 +386,27 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
 
 	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
+	/* Wa_14019821291 */
+	if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) {
+		/*
+		 * This workaround is primarily implemented by the BIOS.  We
+		 * just need to figure out whether the BIOS has applied the
+		 * workaround (meaning the programmed address falls within
+		 * the DSM) and, if so, reserve that part of the DSM to
+		 * prevent accidental reuse.  The DSM location should be just
+		 * below the WOPCM.
+		 */
+		u64 gscpsmi_base = intel_uncore_read64_2x32(uncore,
+							    MTL_GSCPSMI_BASEADDR_LSB,
+							    MTL_GSCPSMI_BASEADDR_MSB);
+		if (gscpsmi_base >= i915->dsm.stolen.start &&
+		    gscpsmi_base < i915->dsm.stolen.end) {
+			*base = gscpsmi_base;
+			*size = i915->dsm.stolen.end - gscpsmi_base;
+			return;
+		}
+	}
+
 	switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
 	case GEN8_STOLEN_RESERVED_1M:
 		*size = 1024 * 1024;
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 6b9f6cf50bf6..c9e6d77abab0 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1082,7 +1082,7 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 		goto err_unlock;
 
 	for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
-		u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
+		u32 *ptr = kmap_local_page(i915_gem_object_get_page(obj, n));
 
 		if (needs_flush & CLFLUSH_BEFORE)
 			drm_clflush_virt_range(ptr, PAGE_SIZE);
@@ -1090,12 +1090,12 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 		if (ptr[dword] != val) {
 			pr_err("n=%lu ptr[%u]=%u, val=%u\n",
 			       n, dword, ptr[dword], val);
-			kunmap_atomic(ptr);
+			kunmap_local(ptr);
 			err = -EINVAL;
 			break;
 		}
 
-		kunmap_atomic(ptr);
+		kunmap_local(ptr);
 	}
 
 	i915_gem_object_finish_access(obj);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 3bef1beec7cb..2a0c0634d446 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -24,7 +24,6 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
 {
 	unsigned int needs_clflush;
 	struct page *page;
-	void *map;
 	u32 *cpu;
 	int err;
 
@@ -34,8 +33,7 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
 		goto out;
 
 	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
-	map = kmap_atomic(page);
-	cpu = map + offset_in_page(offset);
+	cpu = kmap_local_page(page) + offset_in_page(offset);
 
 	if (needs_clflush & CLFLUSH_BEFORE)
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
@@ -45,7 +43,7 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
 	if (needs_clflush & CLFLUSH_AFTER)
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
 
-	kunmap_atomic(map);
+	kunmap_local(cpu);
 	i915_gem_object_finish_access(ctx->obj);
 
 out:
@@ -57,7 +55,6 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
 {
 	unsigned int needs_clflush;
 	struct page *page;
-	void *map;
 	u32 *cpu;
 	int err;
 
@@ -67,15 +64,14 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
 		goto out;
 
 	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
-	map = kmap_atomic(page);
-	cpu = map + offset_in_page(offset);
+	cpu = kmap_local_page(page) + offset_in_page(offset);
 
 	if (needs_clflush & CLFLUSH_BEFORE)
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
 
 	*v = *cpu;
 
-	kunmap_atomic(map);
+	kunmap_local(cpu);
 	i915_gem_object_finish_access(ctx->obj);
 
 out:
@@ -85,6 +81,7 @@ out:
 
 static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
 {
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	u32 __iomem *map;
 	int err = 0;
@@ -99,7 +96,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	intel_gt_pm_get(vma->vm->gt);
+	wakeref = intel_gt_pm_get(vma->vm->gt);
 
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
@@ -112,12 +109,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
 	i915_vma_unpin_iomap(vma);
 
 out_rpm:
-	intel_gt_pm_put(vma->vm->gt);
+	intel_gt_pm_put(vma->vm->gt, wakeref);
 	return err;
 }
 
 static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
 {
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	u32 __iomem *map;
 	int err = 0;
@@ -132,7 +130,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	intel_gt_pm_get(vma->vm->gt);
+	wakeref = intel_gt_pm_get(vma->vm->gt);
 
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
@@ -145,7 +143,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
 	i915_vma_unpin_iomap(vma);
 
 out_rpm:
-	intel_gt_pm_put(vma->vm->gt);
+	intel_gt_pm_put(vma->vm->gt, wakeref);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 7021b6e9b219..89d4dc8b60c6 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -489,12 +489,12 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	for (n = 0; n < real_page_count(obj); n++) {
 		u32 *map;
 
-		map = kmap_atomic(i915_gem_object_get_page(obj, n));
+		map = kmap_local_page(i915_gem_object_get_page(obj, n));
 		for (m = 0; m < DW_PER_PAGE; m++)
 			map[m] = value;
 		if (!has_llc)
 			drm_clflush_virt_range(map, PAGE_SIZE);
-		kunmap_atomic(map);
+		kunmap_local(map);
 	}
 
 	i915_gem_object_finish_access(obj);
@@ -520,7 +520,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 	for (n = 0; n < real_page_count(obj); n++) {
 		u32 *map, m;
 
-		map = kmap_atomic(i915_gem_object_get_page(obj, n));
+		map = kmap_local_page(i915_gem_object_get_page(obj, n));
 		if (needs_flush & CLFLUSH_BEFORE)
 			drm_clflush_virt_range(map, PAGE_SIZE);
 
@@ -546,7 +546,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 		}
 
 out_unmap:
-		kunmap_atomic(map);
+		kunmap_local(map);
 		if (err)
 			break;
 	}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index e57f9390076c..d684a70f2c04 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -504,7 +504,7 @@ static int igt_dmabuf_export_vmap(void *arg)
 	}
 
 	if (memchr_inv(ptr, 0, dmabuf->size)) {
-		pr_err("Exported object not initialiased to zero!\n");
+		pr_err("Exported object not initialised to zero!\n");
 		err = -EINVAL;
 		goto out;
 	}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 72957a36a36b..2c51a2c452fc 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -630,14 +630,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
 static void disable_retire_worker(struct drm_i915_private *i915)
 {
 	i915_gem_driver_unregister__shrinker(i915);
-	intel_gt_pm_get(to_gt(i915));
+	intel_gt_pm_get_untracked(to_gt(i915));
 	cancel_delayed_work_sync(&to_gt(i915)->requests.retire_work);
 }
 
 static void restore_retire_worker(struct drm_i915_private *i915)
 {
 	igt_flush_test(i915);
-	intel_gt_pm_put(to_gt(i915));
+	intel_gt_pm_put_untracked(to_gt(i915));
 	i915_gem_driver_register__shrinker(i915);
 }
 
@@ -778,6 +778,7 @@ err_obj:
 
 static int gtt_set(struct drm_i915_gem_object *obj)
 {
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	void __iomem *map;
 	int err = 0;
@@ -786,7 +787,7 @@ static int gtt_set(struct drm_i915_gem_object *obj)
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	intel_gt_pm_get(vma->vm->gt);
+	wakeref = intel_gt_pm_get(vma->vm->gt);
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
 	if (IS_ERR(map)) {
@@ -798,12 +799,13 @@ static int gtt_set(struct drm_i915_gem_object *obj)
 	i915_vma_unpin_iomap(vma);
 
 out:
-	intel_gt_pm_put(vma->vm->gt);
+	intel_gt_pm_put(vma->vm->gt, wakeref);
 	return err;
 }
 
 static int gtt_check(struct drm_i915_gem_object *obj)
 {
+	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	void __iomem *map;
 	int err = 0;
@@ -812,7 +814,7 @@ static int gtt_check(struct drm_i915_gem_object *obj)
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	intel_gt_pm_get(vma->vm->gt);
+	wakeref = intel_gt_pm_get(vma->vm->gt);
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
 	if (IS_ERR(map)) {
@@ -828,7 +830,7 @@ static int gtt_check(struct drm_i915_gem_object *obj)
 	i915_vma_unpin_iomap(vma);
 
 out:
-	intel_gt_pm_put(vma->vm->gt);
+	intel_gt_pm_put(vma->vm->gt, wakeref);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index e199d7dbb876..2b0327cc47c2 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -83,7 +83,7 @@ live_context(struct drm_i915_private *i915, struct file *file)
 	int err;
 	u32 id;
 
-	pc = proto_context_create(i915, 0);
+	pc = proto_context_create(fpriv, i915, 0);
 	if (IS_ERR(pc))
 		return ERR_CAST(pc);
 
@@ -152,7 +152,7 @@ kernel_context(struct drm_i915_private *i915,
 	struct i915_gem_context *ctx;
 	struct i915_gem_proto_context *pc;
 
-	pc = proto_context_create(i915, 0);
+	pc = proto_context_create(NULL, i915, 0);
 	if (IS_ERR(pc))
 		return ERR_CAST(pc);
 
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 9895e18df043..fa46d2308b0e 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -5,6 +5,7 @@
 
 #include <linux/log2.h>
 
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_lmem.h"
 
 #include "gen8_ppgtt.h"
@@ -222,6 +223,9 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 {
 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 
+	if (vm->rsvd.obj)
+		i915_gem_object_put(vm->rsvd.obj);
+
 	if (intel_vgpu_active(vm->i915))
 		gen8_ppgtt_notify_vgt(ppgtt, false);
 
@@ -950,6 +954,41 @@ err_pd:
 	return ERR_PTR(err);
 }
 
+static int gen8_init_rsvd(struct i915_address_space *vm)
+{
+	struct drm_i915_private *i915 = vm->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int ret;
+
+	/* The memory will be used only by GPU. */
+	obj = i915_gem_object_create_lmem(i915, PAGE_SIZE,
+					  I915_BO_ALLOC_VOLATILE |
+					  I915_BO_ALLOC_GPU_ONLY);
+	if (IS_ERR(obj))
+		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto unref;
+	}
+
+	ret = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+	if (ret)
+		goto unref;
+
+	vm->rsvd.vma = i915_vma_make_unshrinkable(vma);
+	vm->rsvd.obj = obj;
+	vm->total -= vma->node.size;
+	return 0;
+unref:
+	i915_gem_object_put(obj);
+	return ret;
+}
+
 /*
  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  * with a net effect resembling a 2-level page table in normal x86 terms. Each
@@ -1031,6 +1070,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 	if (intel_vgpu_active(gt->i915))
 		gen8_ppgtt_notify_vgt(ppgtt, true);
 
+	err = gen8_init_rsvd(&ppgtt->vm);
+	if (err)
+		goto err_put;
+
 	return ppgtt;
 
 err_put:
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index ecc990ec1b95..d650beb8ed22 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -28,11 +28,14 @@ static void irq_disable(struct intel_breadcrumbs *b)
 
 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
+	intel_wakeref_t wakeref;
+
 	/*
 	 * Since we are waiting on a request, the GPU should be busy
 	 * and should have its own rpm reference.
 	 */
-	if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
+	wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt);
+	if (GEM_WARN_ON(!wakeref))
 		return;
 
 	/*
@@ -41,7 +44,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 	 * which we can add a new waiter and avoid the cost of re-enabling
 	 * the irq.
 	 */
-	WRITE_ONCE(b->irq_armed, true);
+	WRITE_ONCE(b->irq_armed, wakeref);
 
 	/* Requests may have completed before we could enable the interrupt. */
 	if (!b->irq_enabled++ && b->irq_enable(b))
@@ -61,12 +64,14 @@ static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 
 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
 {
+	intel_wakeref_t wakeref = b->irq_armed;
+
 	GEM_BUG_ON(!b->irq_enabled);
 	if (!--b->irq_enabled)
 		b->irq_disable(b);
 
-	WRITE_ONCE(b->irq_armed, false);
-	intel_gt_pm_put_async(b->irq_engine->gt);
+	WRITE_ONCE(b->irq_armed, 0);
+	intel_gt_pm_put_async(b->irq_engine->gt, wakeref);
 }
 
 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
index 72dfd3748c4c..bdf09fd67b6e 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 
 #include "intel_engine_types.h"
+#include "intel_wakeref.h"
 
 /*
  * Rather than have every client wait upon all user interrupts,
@@ -43,7 +44,7 @@ struct intel_breadcrumbs {
 	spinlock_t irq_lock; /* protects the interrupt from hardirq context */
 	struct irq_work irq_work; /* for use from inside irq_lock */
 	unsigned int irq_enabled;
-	bool irq_armed;
+	intel_wakeref_t irq_armed;
 
 	/* Not all breadcrumbs are attached to physical HW */
 	intel_engine_mask_t	engine_mask;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index a53b26178f0a..a2f1245741bb 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -6,6 +6,7 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
 
+#include "i915_drm_client.h"
 #include "i915_drv.h"
 #include "i915_trace.h"
 
@@ -50,6 +51,7 @@ intel_context_create(struct intel_engine_cs *engine)
 
 int intel_context_alloc_state(struct intel_context *ce)
 {
+	struct i915_gem_context *ctx;
 	int err = 0;
 
 	if (mutex_lock_interruptible(&ce->pin_mutex))
@@ -66,6 +68,18 @@ int intel_context_alloc_state(struct intel_context *ce)
 			goto unlock;
 
 		set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+
+		rcu_read_lock();
+		ctx = rcu_dereference(ce->gem_context);
+		if (ctx && !kref_get_unless_zero(&ctx->ref))
+			ctx = NULL;
+		rcu_read_unlock();
+		if (ctx) {
+			if (ctx->client)
+				i915_drm_client_add_context_objects(ctx->client,
+								    ce);
+			i915_gem_context_put(ctx);
+		}
 	}
 
 unlock:
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index a80e3b7c24ff..25564c01507e 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -212,7 +212,7 @@ static inline void intel_context_enter(struct intel_context *ce)
 		return;
 
 	ce->ops->enter(ce);
-	intel_gt_pm_get(ce->vm->gt);
+	ce->wakeref = intel_gt_pm_get(ce->vm->gt);
 }
 
 static inline void intel_context_mark_active(struct intel_context *ce)
@@ -229,7 +229,7 @@ static inline void intel_context_exit(struct intel_context *ce)
 	if (--ce->active_count)
 		return;
 
-	intel_gt_pm_put_async(ce->vm->gt);
+	intel_gt_pm_put_async(ce->vm->gt, ce->wakeref);
 	ce->ops->exit(ce);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index aceaac28a33e..7eccbd70d89f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -17,6 +17,7 @@
 #include "i915_utils.h"
 #include "intel_engine_types.h"
 #include "intel_sseu.h"
+#include "intel_wakeref.h"
 
 #include "uc/intel_guc_fwif.h"
 
@@ -112,6 +113,7 @@ struct intel_context {
 	u32 ring_size;
 	struct intel_ring *ring;
 	struct intel_timeline *timeline;
+	intel_wakeref_t wakeref;
 
 	unsigned long flags;
 #define CONTEXT_BARRIER_BIT		0
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 4a11219e560e..40687806d22a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -47,7 +47,7 @@
 #define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
 #define GEN11_LR_CONTEXT_RENDER_SIZE	(14 * PAGE_SIZE)
 
-#define GEN8_LR_CONTEXT_OTHER_SIZE	( 2 * PAGE_SIZE)
+#define GEN8_LR_CONTEXT_OTHER_SIZE	(2 * PAGE_SIZE)
 
 #define MAX_MMIO_BASES 3
 struct engine_info {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 9a527e1f5be6..1a8e2b7db013 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -188,7 +188,7 @@ static void heartbeat(struct work_struct *wrk)
 			 * low latency and no jitter] the chance to naturally
 			 * complete before being preempted.
 			 */
-			attr.priority = 0;
+			attr.priority = I915_PRIORITY_NORMAL;
 			if (rq->sched.attr.priority >= attr.priority)
 				attr.priority = I915_PRIORITY_HEARTBEAT;
 			if (rq->sched.attr.priority >= attr.priority)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index e91fc881dbf1..96bdb93a948d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -63,7 +63,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
 
 	ENGINE_TRACE(engine, "\n");
 
-	intel_gt_pm_get(engine->gt);
+	engine->wakeref_track = intel_gt_pm_get(engine->gt);
 
 	/* Discard stale context state from across idling */
 	ce = engine->kernel_context;
@@ -122,6 +122,7 @@ __queue_and_release_pm(struct i915_request *rq,
 	 */
 	GEM_BUG_ON(rq->context->active_count != 1);
 	__intel_gt_pm_get(engine->gt);
+	rq->context->wakeref = intel_wakeref_track(&engine->gt->wakeref);
 
 	/*
 	 * We have to serialise all potential retirement paths with our
@@ -285,7 +286,7 @@ static int __engine_park(struct intel_wakeref *wf)
 		engine->park(engine);
 
 	/* While gt calls i915_vma_parked(), we have to break the lock cycle */
-	intel_gt_pm_put_async(engine->gt);
+	intel_gt_pm_put_async(engine->gt, engine->wakeref_track);
 	return 0;
 }
 
@@ -296,7 +297,7 @@ static const struct intel_wakeref_ops wf_ops = {
 
 void intel_engine_init__pm(struct intel_engine_cs *engine)
 {
-	intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops);
+	intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops, engine->name);
 	intel_engine_init_heartbeat(engine);
 
 	intel_gsc_idle_msg_enable(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index fdd4ddd3a978..a8eac59e3779 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -118,9 +118,15 @@
 #define   CCID_EXTENDED_STATE_RESTORE		BIT(2)
 #define   CCID_EXTENDED_STATE_SAVE		BIT(3)
 #define RING_BB_PER_CTX_PTR(base)		_MMIO((base) + 0x1c0) /* gen8+ */
+#define   PER_CTX_BB_FORCE			BIT(2)
+#define   PER_CTX_BB_VALID			BIT(0)
+
 #define RING_INDIRECT_CTX(base)			_MMIO((base) + 0x1c4) /* gen8+ */
 #define RING_INDIRECT_CTX_OFFSET(base)		_MMIO((base) + 0x1c8) /* gen8+ */
 #define ECOSKPD(base)				_MMIO((base) + 0x1d0)
+#define   XEHP_BLITTER_SCHEDULING_MODE_MASK	REG_GENMASK(12, 11)
+#define   XEHP_BLITTER_ROUND_ROBIN_MODE		\
+		REG_FIELD_PREP(XEHP_BLITTER_SCHEDULING_MODE_MASK, 1)
 #define   ECO_CONSTANT_BUFFER_SR_DISABLE	REG_BIT(4)
 #define   ECO_GATING_CX_ONLY			REG_BIT(3)
 #define   GEN6_BLITTER_FBC_NOTIFY		REG_BIT(3)
@@ -257,5 +263,7 @@
 #define VDBOX_CGCTL3F18(base)			_MMIO((base) + 0x3f18)
 #define   ALNUNIT_CLKGATE_DIS			REG_BIT(13)
 
+#define VDBOX_CGCTL3F1C(base)			_MMIO((base) + 0x3f1c)
+#define   MFXPIPE_CLKGATE_DIS			REG_BIT(3)
 
 #endif /* __INTEL_ENGINE_REGS__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 8769760257fd..960e6be2042f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -446,7 +446,9 @@ struct intel_engine_cs {
 	unsigned long serial;
 
 	unsigned long wakeref_serial;
+	intel_wakeref_t wakeref_track;
 	struct intel_wakeref wakeref;
+
 	struct file *default_state;
 
 	struct {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 118164ddbb2e..833987015b8b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -41,12 +41,15 @@ void intel_engine_add_user(struct intel_engine_cs *engine)
 	llist_add(&engine->uabi_llist, &engine->i915->uabi_engines_llist);
 }
 
-static const u8 uabi_classes[] = {
+#define I915_NO_UABI_CLASS ((u16)(-1))
+
+static const u16 uabi_classes[] = {
 	[RENDER_CLASS] = I915_ENGINE_CLASS_RENDER,
 	[COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
 	[VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
 	[VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
 	[COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
+	[OTHER_CLASS] = I915_NO_UABI_CLASS, /* Not exposed to users, no uabi class. */
 };
 
 static int engine_cmp(void *priv, const struct list_head *A,
@@ -200,6 +203,7 @@ static void engine_rename(struct intel_engine_cs *engine, const char *name, u16
 
 void intel_engines_driver_register(struct drm_i915_private *i915)
 {
+	u16 name_instance, other_instance = 0;
 	struct legacy_ring ring = {};
 	struct list_head *it, *next;
 	struct rb_node **p, *prev;
@@ -216,27 +220,28 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
 		if (intel_gt_has_unrecoverable_error(engine->gt))
 			continue; /* ignore incomplete engines */
 
-		/*
-		 * We don't want to expose the GSC engine to the users, but we
-		 * still rename it so it is easier to identify in the debug logs
-		 */
-		if (engine->id == GSC0) {
-			engine_rename(engine, "gsc", 0);
-			continue;
-		}
-
 		GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
 		engine->uabi_class = uabi_classes[engine->class];
+		if (engine->uabi_class == I915_NO_UABI_CLASS) {
+			name_instance = other_instance++;
+		} else {
+			GEM_BUG_ON(engine->uabi_class >=
+				   ARRAY_SIZE(i915->engine_uabi_class_count));
+			name_instance =
+				i915->engine_uabi_class_count[engine->uabi_class]++;
+		}
+		engine->uabi_instance = name_instance;
 
-		GEM_BUG_ON(engine->uabi_class >=
-			   ARRAY_SIZE(i915->engine_uabi_class_count));
-		engine->uabi_instance =
-			i915->engine_uabi_class_count[engine->uabi_class]++;
-
-		/* Replace the internal name with the final user facing name */
+		/*
+		 * Replace the internal name with the final user and log facing
+		 * name.
+		 */
 		engine_rename(engine,
 			      intel_engine_class_repr(engine->class),
-			      engine->uabi_instance);
+			      name_instance);
+
+		if (engine->uabi_class == I915_NO_UABI_CLASS)
+			continue;
 
 		rb_link_node(&engine->uabi_node, prev, p);
 		rb_insert_color(&engine->uabi_node, &i915->uabi_engines);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index e8f42ec6b1b4..42aade0faf2d 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -630,7 +630,7 @@ static void __execlists_schedule_out(struct i915_request * const rq,
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	if (engine->fw_domain && !--engine->fw_active)
 		intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
-	intel_gt_pm_put_async(engine->gt);
+	intel_gt_pm_put_async_untracked(engine->gt);
 
 	/*
 	 * If this is part of a virtual engine, its next request may
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 15fc8e4703f4..21a7e3191c18 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -245,16 +245,15 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 	gen8_ggtt_invalidate(ggtt);
 
 	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
-		if (intel_guc_tlb_invalidation_is_available(&gt->uc.guc)) {
+		if (intel_guc_tlb_invalidation_is_available(&gt->uc.guc))
 			guc_ggtt_ct_invalidate(gt);
-		} else if (GRAPHICS_VER(i915) >= 12) {
+		else if (GRAPHICS_VER(i915) >= 12)
 			intel_uncore_write_fw(gt->uncore,
 					      GEN12_GUC_TLB_INV_CR,
 					      GEN12_GUC_TLB_INV_CR_INVALIDATE);
-		} else {
+		else
 			intel_uncore_write_fw(gt->uncore,
 					      GEN8_GTCR, GEN8_GTCR_INVALIDATE);
-		}
 	}
 }
 
@@ -297,7 +296,7 @@ static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
 	return intel_gt_is_bind_context_ready(gt);
 }
 
-static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
+static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt, intel_wakeref_t *wakeref)
 {
 	struct intel_context *ce;
 	struct intel_gt *gt = ggtt->vm.gt;
@@ -314,7 +313,8 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
 	 * would conflict with fs_reclaim trying to allocate memory while
 	 * doing rpm_resume().
 	 */
-	if (!intel_gt_pm_get_if_awake(gt))
+	*wakeref = intel_gt_pm_get_if_awake(gt);
+	if (!*wakeref)
 		return NULL;
 
 	intel_engine_pm_get(ce->engine);
@@ -322,10 +322,10 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
 	return ce;
 }
 
-static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
+static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref)
 {
 	intel_engine_pm_put(ce->engine);
-	intel_gt_pm_put(ce->engine->gt);
+	intel_gt_pm_put(ce->engine->gt, wakeref);
 }
 
 static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
@@ -338,12 +338,13 @@ static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
 	struct sgt_iter iter;
 	struct i915_request *rq;
 	struct intel_context *ce;
+	intel_wakeref_t wakeref;
 	u32 *cs;
 
 	if (!num_entries)
 		return true;
 
-	ce = gen8_ggtt_bind_get_ce(ggtt);
+	ce = gen8_ggtt_bind_get_ce(ggtt, &wakeref);
 	if (!ce)
 		return false;
 
@@ -419,13 +420,13 @@ queue_err_rq:
 		offset += n_ptes;
 	}
 
-	gen8_ggtt_bind_put_ce(ce);
+	gen8_ggtt_bind_put_ce(ce, wakeref);
 	return true;
 
 err_rq:
 	i915_request_put(rq);
 put_ce:
-	gen8_ggtt_bind_put_ce(ce);
+	gen8_ggtt_bind_put_ce(ce, wakeref);
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.h b/drivers/gpu/drm/i915/gt/intel_gsc.h
index 7ab3ca0f9f26..013c64251448 100644
--- a/drivers/gpu/drm/i915/gt/intel_gsc.h
+++ b/drivers/gpu/drm/i915/gt/intel_gsc.h
@@ -21,8 +21,11 @@ struct mei_aux_device;
 /**
  * struct intel_gsc - graphics security controller
  *
- * @gem_obj: scratch memory GSC operations
- * @intf : gsc interface
+ * @intf: gsc interface
+ * @intf.adev: MEI aux. device for this @intf
+ * @intf.gem_obj: scratch memory GSC operations
+ * @intf.irq: IRQ for this device (%-1 for no IRQ)
+ * @intf.id: this interface's id number/index
  */
 struct intel_gsc {
 	struct intel_gsc_intf {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index ba1186fc524f..a425db5ed3a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -451,7 +451,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
 
 		spin_lock_irqsave(&uncore->lock, flags);
 		intel_uncore_posting_read_fw(uncore,
-					     RING_HEAD(RENDER_RING_BASE));
+					     RING_TAIL(RENDER_RING_BASE));
 		spin_unlock_irqrestore(&uncore->lock, flags);
 	}
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index e1f13735f530..608f5c872928 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -82,6 +82,10 @@ struct drm_printer;
 		  ##__VA_ARGS__);					\
 } while (0)
 
+#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
+	IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \
+	engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
+
 static inline bool gt_is_root(struct intel_gt *gt)
 {
 	return !gt->info.id;
@@ -114,6 +118,11 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc)
 	return container_of(gsc, struct intel_gt, gsc);
 }
 
+static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
+{
+	return guc_to_gt(guc)->i915;
+}
+
 void intel_gt_common_init_early(struct intel_gt *gt);
 int intel_root_gt_init_early(struct drm_i915_private *i915);
 int intel_gt_assign_ggtt(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 34913912d8ae..e253750a51c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -388,8 +388,7 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags)
 		 * registers.  This wakeref will be released in the unlock
 		 * routine.
 		 *
-		 * This is expected to become a formally documented/numbered
-		 * workaround soon.
+		 * Wa_22018931422
 		 */
 		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_GT);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index f5899d503e23..220ac4f92edf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -28,19 +28,20 @@
 static void user_forcewake(struct intel_gt *gt, bool suspend)
 {
 	int count = atomic_read(&gt->user_wakeref);
+	intel_wakeref_t wakeref;
 
 	/* Inside suspend/resume so single threaded, no races to worry about. */
 	if (likely(!count))
 		return;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	if (suspend) {
 		GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
 		atomic_sub(count, &gt->wakeref.count);
 	} else {
 		atomic_add(count, &gt->wakeref.count);
 	}
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 }
 
 static void runtime_begin(struct intel_gt *gt)
@@ -138,7 +139,7 @@ void intel_gt_pm_init_early(struct intel_gt *gt)
 	 * runtime_pm is per-device rather than per-tile, so this is still the
 	 * correct structure.
 	 */
-	intel_wakeref_init(&gt->wakeref, gt->i915, &wf_ops);
+	intel_wakeref_init(&gt->wakeref, gt->i915, &wf_ops, "GT");
 	seqcount_mutex_init(&gt->stats.lock, &gt->wakeref.mutex);
 }
 
@@ -167,7 +168,7 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
 	enum intel_engine_id id;
 	intel_wakeref_t wakeref;
 
-	GT_TRACE(gt, "force:%s", str_yes_no(force));
+	GT_TRACE(gt, "force:%s\n", str_yes_no(force));
 
 	/* Use a raw wakeref to avoid calling intel_display_power_get early */
 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
@@ -236,6 +237,7 @@ int intel_gt_resume(struct intel_gt *gt)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err;
 
 	err = intel_gt_has_unrecoverable_error(gt);
@@ -252,7 +254,7 @@ int intel_gt_resume(struct intel_gt *gt)
 	 */
 	gt_sanitize(gt, true);
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 
 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 	intel_rc6_sanitize(&gt->rc6);
@@ -295,7 +297,7 @@ int intel_gt_resume(struct intel_gt *gt)
 
 out_fw:
 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 	intel_gt_bind_context_set_ready(gt);
 	return err;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index b1eeb5b33918..911fd0160221 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -16,19 +16,28 @@ static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt)
 	return intel_wakeref_is_active(&gt->wakeref);
 }
 
-static inline void intel_gt_pm_get(struct intel_gt *gt)
+static inline void intel_gt_pm_get_untracked(struct intel_gt *gt)
 {
 	intel_wakeref_get(&gt->wakeref);
 }
 
+static inline intel_wakeref_t intel_gt_pm_get(struct intel_gt *gt)
+{
+	intel_gt_pm_get_untracked(gt);
+	return intel_wakeref_track(&gt->wakeref);
+}
+
 static inline void __intel_gt_pm_get(struct intel_gt *gt)
 {
 	__intel_wakeref_get(&gt->wakeref);
 }
 
-static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
+static inline intel_wakeref_t intel_gt_pm_get_if_awake(struct intel_gt *gt)
 {
-	return intel_wakeref_get_if_active(&gt->wakeref);
+	if (!intel_wakeref_get_if_active(&gt->wakeref))
+		return 0;
+
+	return intel_wakeref_track(&gt->wakeref);
 }
 
 static inline void intel_gt_pm_might_get(struct intel_gt *gt)
@@ -36,12 +45,18 @@ static inline void intel_gt_pm_might_get(struct intel_gt *gt)
 	intel_wakeref_might_get(&gt->wakeref);
 }
 
-static inline void intel_gt_pm_put(struct intel_gt *gt)
+static inline void intel_gt_pm_put_untracked(struct intel_gt *gt)
 {
 	intel_wakeref_put(&gt->wakeref);
 }
 
-static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+static inline void intel_gt_pm_put(struct intel_gt *gt, intel_wakeref_t handle)
+{
+	intel_wakeref_untrack(&gt->wakeref, handle);
+	intel_gt_pm_put_untracked(gt);
+}
+
+static inline void intel_gt_pm_put_async_untracked(struct intel_gt *gt)
 {
 	intel_wakeref_put_async(&gt->wakeref);
 }
@@ -51,9 +66,14 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
 	intel_wakeref_might_put(&gt->wakeref);
 }
 
-#define with_intel_gt_pm(gt, tmp) \
-	for (tmp = 1, intel_gt_pm_get(gt); tmp; \
-	     intel_gt_pm_put(gt), tmp = 0)
+static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t handle)
+{
+	intel_wakeref_untrack(&gt->wakeref, handle);
+	intel_gt_pm_put_async_untracked(gt);
+}
+
+#define with_intel_gt_pm(gt, wf) \
+	for (wf = intel_gt_pm_get(gt); wf; intel_gt_pm_put(gt, wf), wf = 0)
 
 /**
  * with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent
@@ -64,7 +84,7 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
  * @wf: pointer to a temporary wakeref.
  */
 #define with_intel_gt_pm_if_awake(gt, wf) \
-	for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
+	for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt, wf), wf = 0)
 
 static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index f900cc68d6d9..7114c116e928 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -27,7 +27,7 @@
 void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
 {
 	atomic_inc(&gt->user_wakeref);
-	intel_gt_pm_get(gt);
+	intel_gt_pm_get_untracked(gt);
 	if (GRAPHICS_VER(gt->i915) >= 6)
 		intel_uncore_forcewake_user_get(gt->uncore);
 }
@@ -36,7 +36,7 @@ void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
 {
 	if (GRAPHICS_VER(gt->i915) >= 6)
 		intel_uncore_forcewake_user_put(gt->uncore);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put_untracked(gt);
 	atomic_dec(&gt->user_wakeref);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index eecd0a87a647..50962cfd1353 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -469,6 +469,9 @@
 #define XEHP_PSS_MODE2				MCR_REG(0x703c)
 #define   SCOREBOARD_STALL_FLUSH_CONTROL	REG_BIT(5)
 
+#define XEHP_PSS_CHICKEN			MCR_REG(0x7044)
+#define   FD_END_COLLECT			REG_BIT(5)
+
 #define GEN7_SC_INSTDONE			_MMIO(0x7100)
 #define GEN12_SC_INSTDONE_EXTRA			_MMIO(0x7104)
 #define GEN12_SC_INSTDONE_EXTRA2		_MMIO(0x7108)
@@ -537,6 +540,9 @@
 #define XEHP_SQCM				MCR_REG(0x8724)
 #define   EN_32B_ACCESS				REG_BIT(30)
 
+#define MTL_GSCPSMI_BASEADDR_LSB		_MMIO(0x880c)
+#define MTL_GSCPSMI_BASEADDR_MSB		_MMIO(0x8810)
+
 #define HSW_IDICR				_MMIO(0x9008)
 #define   IDIHASHMSK(x)				(((x) & 0x3f) << 16)
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 4fbed27ef0ec..86f73fe558ca 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -63,6 +63,9 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz)
 	if (!IS_ERR(obj)) {
 		obj->base.resv = i915_vm_resv_get(vm);
 		obj->shares_resv_from = vm;
+
+		if (vm->fpriv)
+			i915_drm_client_add_object(vm->fpriv->client, obj);
 	}
 
 	return obj;
@@ -84,6 +87,9 @@ struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
 	if (!IS_ERR(obj)) {
 		obj->base.resv = i915_vm_resv_get(vm);
 		obj->shares_resv_from = vm;
+
+		if (vm->fpriv)
+			i915_drm_client_add_object(vm->fpriv->client, obj);
 	}
 
 	return obj;
@@ -95,6 +101,16 @@ int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
 	void *vaddr;
 
 	type = intel_gt_coherent_map_type(vm->gt, obj, true);
+	/*
+	 * FIXME: It is suspected that some Address Translation Service (ATS)
+	 * issue on IOMMU is causing CAT errors to occur on some MTL workloads.
+	 * Applying a write barrier to the ppgtt set entry functions appeared
+	 * to have no effect, so we must temporarily use I915_MAP_WC here on
+	 * MTL until a proper ATS solution is found.
+	 */
+	if (IS_METEORLAKE(vm->i915))
+		type = I915_MAP_WC;
+
 	vaddr = i915_gem_object_pin_map_unlocked(obj, type);
 	if (IS_ERR(vaddr))
 		return PTR_ERR(vaddr);
@@ -109,6 +125,16 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object
 	void *vaddr;
 
 	type = intel_gt_coherent_map_type(vm->gt, obj, true);
+	/*
+	 * FIXME: It is suspected that some Address Translation Service (ATS)
+	 * issue on IOMMU is causing CAT errors to occur on some MTL workloads.
+	 * Applying a write barrier to the ppgtt set entry functions appeared
+	 * to have no effect, so we must temporarily use I915_MAP_WC here on
+	 * MTL until a proper ATS solution is found.
+	 */
+	if (IS_METEORLAKE(vm->i915))
+		type = I915_MAP_WC;
+
 	vaddr = i915_gem_object_pin_map(obj, type);
 	if (IS_ERR(vaddr))
 		return PTR_ERR(vaddr);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index b471edac2699..6b85222ee3ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -249,8 +249,13 @@ struct i915_address_space {
 	struct work_struct release_work;
 
 	struct drm_mm mm;
+	struct {
+		struct drm_i915_gem_object *obj;
+		struct i915_vma *vma;
+	} rsvd;
 	struct intel_gt *gt;
 	struct drm_i915_private *i915;
+	struct drm_i915_file_private *fpriv;
 	struct device *dma;
 	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
 	u64 reserved;		/* size addr space reserved */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index eaf66d903166..7c367ba8d9dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -829,6 +829,18 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
 }
 
 static void
+lrc_setup_bb_per_ctx(u32 *regs,
+		     const struct intel_engine_cs *engine,
+		     u32 ctx_bb_ggtt_addr)
+{
+	GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
+	regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
+		ctx_bb_ggtt_addr |
+		PER_CTX_BB_FORCE |
+		PER_CTX_BB_VALID;
+}
+
+static void
 lrc_setup_indirect_ctx(u32 *regs,
 		       const struct intel_engine_cs *engine,
 		       u32 ctx_bb_ggtt_addr,
@@ -1020,7 +1032,13 @@ static u32 context_wa_bb_offset(const struct intel_context *ce)
 	return PAGE_SIZE * ce->wa_bb_page;
 }
 
-static u32 *context_indirect_bb(const struct intel_context *ce)
+/*
+ * per_ctx below determines which WABB section is used.
+ * When true, the function returns the location of the
+ * PER_CTX_BB.  When false, the function returns the
+ * location of the INDIRECT_CTX.
+ */
+static u32 *context_wabb(const struct intel_context *ce, bool per_ctx)
 {
 	void *ptr;
 
@@ -1029,6 +1047,7 @@ static u32 *context_indirect_bb(const struct intel_context *ce)
 	ptr = ce->lrc_reg_state;
 	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
 	ptr += context_wa_bb_offset(ce);
+	ptr += per_ctx ? PAGE_SIZE : 0;
 
 	return ptr;
 }
@@ -1105,7 +1124,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
 
 	if (GRAPHICS_VER(engine->i915) >= 12) {
 		ce->wa_bb_page = context_size / PAGE_SIZE;
-		context_size += PAGE_SIZE;
+		/* INDIRECT_CTX and PER_CTX_BB need separate pages. */
+		context_size += PAGE_SIZE * 2;
 	}
 
 	if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
@@ -1407,12 +1427,85 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
 	return gen12_emit_aux_table_inv(ce->engine, cs);
 }
 
+static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs)
+{
+	struct intel_gt *gt = ce->engine->gt;
+	int mocs = gt->mocs.uc_index << 1;
+
+	/**
+	 * Wa_16018031267 / Wa_16018063123 requires that SW forces the
+	 * main copy engine arbitration into round robin mode.  We
+	 * additionally need to submit the following WABB blt command
+	 * to produce 4 subblits with each subblit generating 0 byte
+	 * write requests as WABB:
+	 *
+	 * XY_FASTCOLOR_BLT
+	 *  BG0    -> 5100000E
+	 *  BG1    -> 0000003F (Dest pitch)
+	 *  BG2    -> 00000000 (X1, Y1) = (0, 0)
+	 *  BG3    -> 00040001 (X2, Y2) = (1, 4)
+	 *  BG4    -> scratch
+	 *  BG5    -> scratch
+	 *  BG6-12 -> 00000000
+	 *  BG13   -> 20004004 (Surf. Width= 2,Surf. Height = 5 )
+	 *  BG14   -> 00000010 (Qpitch = 4)
+	 *  BG15   -> 00000000
+	 */
+	*cs++ = XY_FAST_COLOR_BLT_CMD | (16 - 2);
+	*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 0x3f;
+	*cs++ = 0;
+	*cs++ = 4 << 16 | 1;
+	*cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
+	*cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0x20004004;
+	*cs++ = 0x10;
+	*cs++ = 0;
+
+	return cs;
+}
+
+static u32 *
+xehp_emit_per_ctx_bb(const struct intel_context *ce, u32 *cs)
+{
+	/* Wa_16018031267, Wa_16018063123 */
+	if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine))
+		cs = xehp_emit_fastcolor_blt_wabb(ce, cs);
+
+	return cs;
+}
+
+static void
+setup_per_ctx_bb(const struct intel_context *ce,
+		 const struct intel_engine_cs *engine,
+		 u32 *(*emit)(const struct intel_context *, u32 *))
+{
+	/* Place PER_CTX_BB on next page after INDIRECT_CTX */
+	u32 * const start = context_wabb(ce, true);
+	u32 *cs;
+
+	cs = emit(ce, start);
+
+	/* PER_CTX_BB must manually terminate */
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
+	lrc_setup_bb_per_ctx(ce->lrc_reg_state, engine,
+			     lrc_indirect_bb(ce) + PAGE_SIZE);
+}
+
 static void
 setup_indirect_ctx_bb(const struct intel_context *ce,
 		      const struct intel_engine_cs *engine,
 		      u32 *(*emit)(const struct intel_context *, u32 *))
 {
-	u32 * const start = context_indirect_bb(ce);
+	u32 * const start = context_wabb(ce, false);
 	u32 *cs;
 
 	cs = emit(ce, start);
@@ -1511,6 +1604,7 @@ u32 lrc_update_regs(const struct intel_context *ce,
 		/* Mutually exclusive wrt to global indirect bb */
 		GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
 		setup_indirect_ctx_bb(ce, engine, fn);
+		setup_per_ctx_bb(ce, engine, xehp_emit_per_ctx_bb);
 	}
 
 	return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index d5ed904f355d..6801f8b95c53 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1293,7 +1293,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
 	if (msg)
 		drm_notice(&engine->i915->drm,
 			   "Resetting %s for %s\n", engine->name, msg);
-	atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
+	i915_increase_reset_engine_count(&engine->i915->gpu_error, engine);
 
 	ret = intel_gt_reset_engine(engine);
 	if (ret) {
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index f602895f6d0d..6a3246240e81 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -849,13 +849,12 @@ void intel_sseu_print_topology(struct drm_i915_private *i915,
 			       const struct sseu_dev_info *sseu,
 			       struct drm_printer *p)
 {
-	if (sseu->max_slices == 0) {
+	if (sseu->max_slices == 0)
 		drm_printf(p, "Unavailable\n");
-	} else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+	else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
 		sseu_print_xehp_topology(sseu, p);
-	} else {
+	else
 		sseu_print_hsw_topology(sseu, p);
-	}
 }
 
 void intel_sseu_print_ss_info(const char *type,
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 192ac0e59afa..3eacbc50caf8 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -777,6 +777,9 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
 
 	/* Wa_18019271663:dg2 */
 	wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
+
+	/* Wa_14019877138:dg2 */
+	wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
 }
 
 static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
@@ -1663,8 +1666,22 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 }
 
 static void
+wa_16021867713(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+	struct intel_engine_cs *engine;
+	int id;
+
+	for_each_engine(engine, gt, id)
+		if (engine->class == VIDEO_DECODE_CLASS)
+			wa_write_or(wal, VDBOX_CGCTL3F1C(engine->mmio_base),
+				    MFXPIPE_CLKGATE_DIS);
+}
+
+static void
 xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 {
+	wa_16021867713(gt, wal);
+
 	/*
 	 * Wa_14018778641
 	 * Wa_18018781329
@@ -1674,6 +1691,9 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 	 */
 	wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
 
+	/* Wa_22016670082 */
+	wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
+
 	debug_dump_steering(gt);
 }
 
@@ -2340,14 +2360,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 			   0, true);
 	}
 
-	if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
-		/* Wa_22014600077:dg2 */
-		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
-			   _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
-			   0 /* Wa_14012342262 write-only reg, so skip verification */,
-			   true);
-	}
-
 	if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
 	    IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
 		/*
@@ -2782,6 +2794,11 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 			 RING_SEMA_WAIT_POLL(engine->mmio_base),
 			 1);
 	}
+	/* Wa_16018031267, Wa_16018063123 */
+	if (NEEDS_FASTCOLOR_BLT_WABB(engine))
+		wa_masked_field_set(wal, ECOSKPD(engine->mmio_base),
+				    XEHP_BLITTER_SCHEDULING_MODE_MASK,
+				    XEHP_BLITTER_ROUND_ROBIN_MODE);
 }
 
 static void
@@ -2915,6 +2932,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 		 * Wa_22015475538:dg2
 		 */
 		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+
+		/* Wa_18028616096 */
+		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
 	}
 
 	if (IS_DG2_G11(i915)) {
@@ -2943,11 +2963,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 			   true);
 	}
 
-	if (IS_DG2_G10(i915) || IS_DG2_G12(i915)) {
-		/* Wa_18028616096 */
-		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
-	}
-
 	if (IS_XEHPSDV(i915)) {
 		/* Wa_1409954639 */
 		wa_mcr_masked_en(wal,
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 86cecf7a1105..5ffa5e30f419 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -21,20 +21,22 @@ static int cmp_u32(const void *A, const void *B)
 	return *a - *b;
 }
 
-static void perf_begin(struct intel_gt *gt)
+static intel_wakeref_t perf_begin(struct intel_gt *gt)
 {
-	intel_gt_pm_get(gt);
+	intel_wakeref_t wakeref = intel_gt_pm_get(gt);
 
 	/* Boost gpufreq to max [waitboost] and keep it fixed */
 	atomic_inc(&gt->rps.num_waiters);
 	queue_work(gt->i915->unordered_wq, &gt->rps.work);
 	flush_work(&gt->rps.work);
+
+	return wakeref;
 }
 
-static int perf_end(struct intel_gt *gt)
+static int perf_end(struct intel_gt *gt, intel_wakeref_t wakeref)
 {
 	atomic_dec(&gt->rps.num_waiters);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	return igt_flush_test(gt->i915);
 }
@@ -133,12 +135,13 @@ static int perf_mi_bb_start(void *arg)
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
 		return 0;
 
-	perf_begin(gt);
+	wakeref = perf_begin(gt);
 	for_each_engine(engine, gt, id) {
 		struct intel_context *ce = engine->kernel_context;
 		struct i915_vma *batch;
@@ -207,7 +210,7 @@ out:
 		pr_info("%s: MI_BB_START cycles: %u\n",
 			engine->name, trifilter(cycles));
 	}
-	if (perf_end(gt))
+	if (perf_end(gt, wakeref))
 		err = -EIO;
 
 	return err;
@@ -260,12 +263,13 @@ static int perf_mi_noop(void *arg)
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
 		return 0;
 
-	perf_begin(gt);
+	wakeref = perf_begin(gt);
 	for_each_engine(engine, gt, id) {
 		struct intel_context *ce = engine->kernel_context;
 		struct i915_vma *base, *nop;
@@ -364,7 +368,7 @@ out:
 		pr_info("%s: 16K MI_NOOP cycles: %u\n",
 			engine->name, trifilter(cycles));
 	}
-	if (perf_end(gt))
+	if (perf_end(gt, wakeref))
 		err = -EIO;
 
 	return err;
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 273d440a53e3..bc441ce7b380 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -84,7 +84,7 @@ static struct pulse *pulse_create(void)
 
 static void pulse_unlock_wait(struct pulse *p)
 {
-	i915_active_unlock_wait(&p->active);
+	wait_var_event_timeout(&p->active, i915_active_is_idle(&p->active), HZ);
 }
 
 static int __live_idle_pulse(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 0971241707ce..33351deeea4f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -81,6 +81,7 @@ static int live_gt_clocks(void *arg)
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	if (!gt->clock_frequency) { /* unknown */
@@ -91,7 +92,7 @@ static int live_gt_clocks(void *arg)
 	if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
 		return 0;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 
 	for_each_engine(engine, gt, id) {
@@ -128,7 +129,7 @@ static int live_gt_clocks(void *arg)
 	}
 
 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 5f826b6dcf5d..e17b8777d21d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1555,7 +1555,7 @@ static int live_lrc_isolation(void *arg)
 	return err;
 }
 
-static int indirect_ctx_submit_req(struct intel_context *ce)
+static int wabb_ctx_submit_req(struct intel_context *ce)
 {
 	struct i915_request *rq;
 	int err = 0;
@@ -1579,7 +1579,8 @@ static int indirect_ctx_submit_req(struct intel_context *ce)
 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
 
 static u32 *
-emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+emit_wabb_ctx_canary(const struct intel_context *ce,
+		     u32 *cs, bool per_ctx)
 {
 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 |
 		MI_SRM_LRM_GLOBAL_GTT |
@@ -1587,26 +1588,43 @@ emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
 	*cs++ = i915_mmio_reg_offset(RING_START(0));
 	*cs++ = i915_ggtt_offset(ce->state) +
 		context_wa_bb_offset(ce) +
-		CTX_BB_CANARY_OFFSET;
+		CTX_BB_CANARY_OFFSET +
+		(per_ctx ? PAGE_SIZE : 0);
 	*cs++ = 0;
 
 	return cs;
 }
 
+static u32 *
+emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+	return emit_wabb_ctx_canary(ce, cs, false);
+}
+
+static u32 *
+emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+	return emit_wabb_ctx_canary(ce, cs, true);
+}
+
 static void
-indirect_ctx_bb_setup(struct intel_context *ce)
+wabb_ctx_setup(struct intel_context *ce, bool per_ctx)
 {
-	u32 *cs = context_indirect_bb(ce);
+	u32 *cs = context_wabb(ce, per_ctx);
 
 	cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
 
-	setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
+	if (per_ctx)
+		setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary);
+	else
+		setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
 }
 
-static bool check_ring_start(struct intel_context *ce)
+static bool check_ring_start(struct intel_context *ce, bool per_ctx)
 {
 	const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
-		LRC_STATE_OFFSET + context_wa_bb_offset(ce);
+		LRC_STATE_OFFSET + context_wa_bb_offset(ce) +
+		(per_ctx ? PAGE_SIZE : 0);
 
 	if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
 		return true;
@@ -1618,21 +1636,21 @@ static bool check_ring_start(struct intel_context *ce)
 	return false;
 }
 
-static int indirect_ctx_bb_check(struct intel_context *ce)
+static int wabb_ctx_check(struct intel_context *ce, bool per_ctx)
 {
 	int err;
 
-	err = indirect_ctx_submit_req(ce);
+	err = wabb_ctx_submit_req(ce);
 	if (err)
 		return err;
 
-	if (!check_ring_start(ce))
+	if (!check_ring_start(ce, per_ctx))
 		return -EINVAL;
 
 	return 0;
 }
 
-static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
+static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx)
 {
 	struct intel_context *a, *b;
 	int err;
@@ -1667,14 +1685,14 @@ static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
 	 * As ring start is restored apriori of starting the indirect ctx bb and
 	 * as it will be different for each context, it fits to this purpose.
 	 */
-	indirect_ctx_bb_setup(a);
-	indirect_ctx_bb_setup(b);
+	wabb_ctx_setup(a, per_ctx);
+	wabb_ctx_setup(b, per_ctx);
 
-	err = indirect_ctx_bb_check(a);
+	err = wabb_ctx_check(a, per_ctx);
 	if (err)
 		goto unpin_b;
 
-	err = indirect_ctx_bb_check(b);
+	err = wabb_ctx_check(b, per_ctx);
 
 unpin_b:
 	intel_context_unpin(b);
@@ -1688,7 +1706,7 @@ put_a:
 	return err;
 }
 
-static int live_lrc_indirect_ctx_bb(void *arg)
+static int lrc_wabb_ctx(void *arg, bool per_ctx)
 {
 	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
@@ -1697,7 +1715,7 @@ static int live_lrc_indirect_ctx_bb(void *arg)
 
 	for_each_engine(engine, gt, id) {
 		intel_engine_pm_get(engine);
-		err = __live_lrc_indirect_ctx_bb(engine);
+		err = __lrc_wabb_ctx(engine, per_ctx);
 		intel_engine_pm_put(engine);
 
 		if (igt_flush_test(gt->i915))
@@ -1710,6 +1728,16 @@ static int live_lrc_indirect_ctx_bb(void *arg)
 	return err;
 }
 
+static int live_lrc_indirect_ctx_bb(void *arg)
+{
+	return lrc_wabb_ctx(arg, false);
+}
+
+static int live_lrc_per_ctx_bb(void *arg)
+{
+	return lrc_wabb_ctx(arg, true);
+}
+
 static void garbage_reset(struct intel_engine_cs *engine,
 			  struct i915_request *rq)
 {
@@ -1947,6 +1975,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_lrc_garbage),
 		SUBTEST(live_pphwsp_runtime),
 		SUBTEST(live_lrc_indirect_ctx_bb),
+		SUBTEST(live_lrc_per_ctx_bb),
 	};
 
 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 79aa6ac66ad2..f40de408cd3a 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -261,11 +261,12 @@ static int igt_atomic_reset(void *arg)
 {
 	struct intel_gt *gt = arg;
 	const typeof(*igt_atomic_phases) *p;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that the resets are usable from atomic context */
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	igt_global_reset_lock(gt);
 
 	/* Flush any requests before we get started and check basics */
@@ -296,7 +297,7 @@ static int igt_atomic_reset(void *arg)
 
 unlock:
 	igt_global_reset_unlock(gt);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	return err;
 }
@@ -307,6 +308,7 @@ static int igt_atomic_engine_reset(void *arg)
 	const typeof(*igt_atomic_phases) *p;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that the resets are usable from atomic context */
@@ -317,7 +319,7 @@ static int igt_atomic_engine_reset(void *arg)
 	if (intel_uc_uses_guc_submission(&gt->uc))
 		return 0;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	igt_global_reset_lock(gt);
 
 	/* Flush any requests before we get started and check basics */
@@ -365,7 +367,7 @@ static int igt_atomic_engine_reset(void *arg)
 
 out_unlock:
 	igt_global_reset_unlock(gt);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index fb30f733b036..dcef8d498919 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -224,6 +224,7 @@ int live_rps_clock_interval(void *arg)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	struct igt_spinner spin;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
@@ -236,7 +237,7 @@ int live_rps_clock_interval(void *arg)
 	saved_work = rps->work.func;
 	rps->work.func = dummy_rps_work;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	intel_rps_disable(&gt->rps);
 
 	intel_gt_check_clock_frequency(gt);
@@ -355,7 +356,7 @@ int live_rps_clock_interval(void *arg)
 	}
 
 	intel_rps_enable(&gt->rps);
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	igt_spinner_fini(&spin);
 
@@ -376,6 +377,7 @@ int live_rps_control(void *arg)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	struct igt_spinner spin;
+	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/*
@@ -398,7 +400,7 @@ int live_rps_control(void *arg)
 	saved_work = rps->work.func;
 	rps->work.func = dummy_rps_work;
 
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	for_each_engine(engine, gt, id) {
 		struct i915_request *rq;
 		ktime_t min_dt, max_dt;
@@ -488,7 +490,7 @@ int live_rps_control(void *arg)
 			break;
 		}
 	}
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 
 	igt_spinner_fini(&spin);
 
@@ -1023,6 +1025,7 @@ int live_rps_interrupt(void *arg)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	struct igt_spinner spin;
+	intel_wakeref_t wakeref;
 	u32 pm_events;
 	int err = 0;
 
@@ -1033,9 +1036,9 @@ int live_rps_interrupt(void *arg)
 	if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
 		return 0;
 
-	intel_gt_pm_get(gt);
-	pm_events = rps->pm_events;
-	intel_gt_pm_put(gt);
+	pm_events = 0;
+	with_intel_gt_pm(gt, wakeref)
+		pm_events = rps->pm_events;
 	if (!pm_events) {
 		pr_err("No RPS PM events registered, but RPS is enabled?\n");
 		return -ENODEV;
diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index 952c8d52d68a..302d0540295d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -266,6 +266,7 @@ static int run_test(struct intel_gt *gt, int test_type)
 	struct intel_rps *rps = &gt->rps;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
 	struct igt_spinner spin;
 	u32 slpc_min_freq, slpc_max_freq;
 	int err = 0;
@@ -311,7 +312,7 @@ static int run_test(struct intel_gt *gt, int test_type)
 	}
 
 	intel_gt_pm_wait_for_idle(gt);
-	intel_gt_pm_get(gt);
+	wakeref = intel_gt_pm_get(gt);
 	for_each_engine(engine, gt, id) {
 		struct i915_request *rq;
 		u32 max_act_freq;
@@ -397,7 +398,7 @@ static int run_test(struct intel_gt *gt, int test_type)
 	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
-	intel_gt_pm_put(gt);
+	intel_gt_pm_put(gt, wakeref);
 	igt_spinner_fini(&spin);
 	intel_gt_pm_wait_for_idle(gt);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
index 5f138de3c14f..40817ebcca71 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
@@ -322,6 +322,7 @@ static int i915_gsc_proxy_component_bind(struct device *i915_kdev,
 	gsc->proxy.component = data;
 	gsc->proxy.component->mei_dev = mei_kdev;
 	mutex_unlock(&gsc->proxy.mutex);
+	gt_dbg(gt, "GSC proxy mei component bound\n");
 
 	return 0;
 }
@@ -342,6 +343,7 @@ static void i915_gsc_proxy_component_unbind(struct device *i915_kdev,
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE),
 				 HECI_H_CSR_IE | HECI_H_CSR_RST, 0);
+	gt_dbg(gt, "GSC proxy mei component unbound\n");
 }
 
 static const struct component_ops i915_gsc_proxy_component_ops = {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 3f3df1166b86..2b450c43bbd7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -330,7 +330,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
 
 static u32 guc_ctl_devid(struct intel_guc *guc)
 {
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 
 	return (INTEL_DEVID(i915) << 16) | INTEL_REVID(i915);
 }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 2b6dfe62c8f2..813cc888e6fa 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -105,61 +105,67 @@ struct intel_guc {
 	 */
 	struct {
 		/**
-		 * @lock: protects everything in submission_state,
-		 * ce->guc_id.id, and ce->guc_id.ref when transitioning in and
-		 * out of zero
+		 * @submission_state.lock: protects everything in
+		 * submission_state, ce->guc_id.id, and ce->guc_id.ref
+		 * when transitioning in and out of zero
 		 */
 		spinlock_t lock;
 		/**
-		 * @guc_ids: used to allocate new guc_ids, single-lrc
+		 * @submission_state.guc_ids: used to allocate new
+		 * guc_ids, single-lrc
 		 */
 		struct ida guc_ids;
 		/**
-		 * @num_guc_ids: Number of guc_ids, selftest feature to be able
-		 * to reduce this number while testing.
+		 * @submission_state.num_guc_ids: Number of guc_ids, selftest
+		 * feature to be able to reduce this number while testing.
 		 */
 		int num_guc_ids;
 		/**
-		 * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
+		 * @submission_state.guc_ids_bitmap: used to allocate
+		 * new guc_ids, multi-lrc
 		 */
 		unsigned long *guc_ids_bitmap;
 		/**
-		 * @guc_id_list: list of intel_context with valid guc_ids but no
-		 * refs
+		 * @submission_state.guc_id_list: list of intel_context
+		 * with valid guc_ids but no refs
 		 */
 		struct list_head guc_id_list;
 		/**
-		 * @guc_ids_in_use: Number single-lrc guc_ids in use
+		 * @submission_state.guc_ids_in_use: Number single-lrc
+		 * guc_ids in use
 		 */
 		unsigned int guc_ids_in_use;
 		/**
-		 * @destroyed_contexts: list of contexts waiting to be destroyed
-		 * (deregistered with the GuC)
+		 * @submission_state.destroyed_contexts: list of contexts
+		 * waiting to be destroyed (deregistered with the GuC)
 		 */
 		struct list_head destroyed_contexts;
 		/**
-		 * @destroyed_worker: worker to deregister contexts, need as we
-		 * need to take a GT PM reference and can't from destroy
-		 * function as it might be in an atomic context (no sleeping)
+		 * @submission_state.destroyed_worker: worker to deregister
+		 * contexts, need as we need to take a GT PM reference and
+		 * can't from destroy function as it might be in an atomic
+		 * context (no sleeping)
 		 */
 		struct work_struct destroyed_worker;
 		/**
-		 * @reset_fail_worker: worker to trigger a GT reset after an
-		 * engine reset fails
+		 * @submission_state.reset_fail_worker: worker to trigger
+		 * a GT reset after an engine reset fails
 		 */
 		struct work_struct reset_fail_worker;
 		/**
-		 * @reset_fail_mask: mask of engines that failed to reset
+		 * @submission_state.reset_fail_mask: mask of engines that
+		 * failed to reset
 		 */
 		intel_engine_mask_t reset_fail_mask;
 		/**
-		 * @sched_disable_delay_ms: schedule disable delay, in ms, for
-		 * contexts
+		 * @submission_state.sched_disable_delay_ms: schedule
+		 * disable delay, in ms, for contexts
 		 */
 		unsigned int sched_disable_delay_ms;
 		/**
-		 * @sched_disable_gucid_threshold: threshold of min remaining available
-		 * guc_ids before we start bypassing the schedule disable delay
+		 * @submission_state.sched_disable_gucid_threshold:
+		 * threshold of min remaining available guc_ids before
+		 * we start bypassing the schedule disable delay
 		 */
 		unsigned int sched_disable_gucid_threshold;
 	} submission_state;
@@ -243,37 +249,40 @@ struct intel_guc {
 	 */
 	struct {
 		/**
-		 * @lock: Lock protecting the below fields and the engine stats.
+		 * @timestamp.lock: Lock protecting the below fields and
+		 * the engine stats.
 		 */
 		spinlock_t lock;
 
 		/**
-		 * @gt_stamp: 64 bit extended value of the GT timestamp.
+		 * @timestamp.gt_stamp: 64-bit extended value of the GT
+		 * timestamp.
 		 */
 		u64 gt_stamp;
 
 		/**
-		 * @ping_delay: Period for polling the GT timestamp for
-		 * overflow.
+		 * @timestamp.ping_delay: Period for polling the GT
+		 * timestamp for overflow.
 		 */
 		unsigned long ping_delay;
 
 		/**
-		 * @work: Periodic work to adjust GT timestamp, engine and
-		 * context usage for overflows.
+		 * @timestamp.work: Periodic work to adjust GT timestamp,
+		 * engine and context usage for overflows.
 		 */
 		struct delayed_work work;
 
 		/**
-		 * @shift: Right shift value for the gpm timestamp
+		 * @timestamp.shift: Right shift value for the gpm timestamp
 		 */
 		u32 shift;
 
 		/**
-		 * @last_stat_jiffies: jiffies at last actual stats collection time
-		 * We use this timestamp to ensure we don't oversample the
-		 * stats because runtime power management events can trigger
-		 * stats collection at much higher rates than required.
+		 * @timestamp.last_stat_jiffies: jiffies at last actual
+		 * stats collection time. We use this timestamp to ensure
+		 * we don't oversample the stats because runtime power
+		 * management events can trigger stats collection at much
+		 * higher rates than required.
 		 */
 		unsigned long last_stat_jiffies;
 	} timestamp;
@@ -297,6 +306,10 @@ struct intel_guc {
 	 * @number_guc_id_stolen: The number of guc_ids that have been stolen
 	 */
 	int number_guc_id_stolen;
+	/**
+	 * @fast_response_selftest: Backdoor to CT handler for fast response selftest
+	 */
+	u32 fast_response_selftest;
 #endif
 };
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
index a4da0208c883..a1cd40d80517 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -355,7 +355,7 @@ guc_capture_alloc_steered_lists(struct intel_guc *guc,
 static const struct __guc_mmio_reg_descr_group *
 guc_capture_get_device_reglist(struct intel_guc *guc)
 {
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 	const struct __guc_mmio_reg_descr_group *lists;
 
 	if (GRAPHICS_VER(i915) >= 12)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 89e314b3756b..0d5197c0824a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -265,7 +265,7 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
 	u32 *cmds;
 	int err;
 
-	err = i915_inject_probe_error(guc_to_gt(guc)->i915, -ENXIO);
+	err = i915_inject_probe_error(guc_to_i915(guc), -ENXIO);
 	if (err)
 		return err;
 
@@ -1076,6 +1076,15 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 		found = true;
 		break;
 	}
+
+#ifdef CONFIG_DRM_I915_SELFTEST
+	if (!found && ct_to_guc(ct)->fast_response_selftest) {
+		CT_DEBUG(ct, "Assuming unsolicited response due to FAST_REQUEST selftest\n");
+		ct_to_guc(ct)->fast_response_selftest++;
+		found = true;
+	}
+#endif
+
 	if (!found) {
 		CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
 			 len, hxg[0], fence, ct->requests.last_fence);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 55bc8b55fbc0..bf16351c9349 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -520,7 +520,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log)
 static int guc_log_relay_create(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 	struct rchan *guc_log_relay_chan;
 	size_t n_subbufs, subbuf_size;
 	int ret;
@@ -573,7 +573,7 @@ static void guc_log_relay_destroy(struct intel_guc_log *log)
 static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 	intel_wakeref_t wakeref;
 
 	_guc_log_copy_debuglogs_for_relay(log);
@@ -589,7 +589,7 @@ static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
 static u32 __get_default_log_level(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 
 	/* A negative value means "use platform/config default" */
 	if (i915->params.guc_log_level < 0) {
@@ -664,7 +664,7 @@ void intel_guc_log_destroy(struct intel_guc_log *log)
 int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 	intel_wakeref_t wakeref;
 	int ret = 0;
 
@@ -796,7 +796,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
 static void guc_log_relay_stop(struct intel_guc_log *log)
 {
 	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 
 	if (!log->relay.started)
 		return;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
index 1adec6de223c..9df7927304ae 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
@@ -14,7 +14,7 @@ static bool __guc_rc_supported(struct intel_guc *guc)
 {
 	/* GuC RC is unavailable for pre-Gen12 */
 	return guc->submission_supported &&
-		GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12;
+		GRAPHICS_VER(guc_to_i915(guc)) >= 12;
 }
 
 static bool __guc_rc_selected(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 2dfb07cc4b33..3e681ab6fbf9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -34,7 +34,7 @@ static bool __detect_slpc_supported(struct intel_guc *guc)
 {
 	/* GuC SLPC is unavailable for pre-Gen12 */
 	return guc->submission_supported &&
-		GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12;
+		GRAPHICS_VER(guc_to_i915(guc)) >= 12;
 }
 
 static bool __guc_slpc_selected(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index d37698bd6b91..a259f1118c5a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1107,7 +1107,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
 			if (deregister)
 				guc_signal_context_fence(ce);
 			if (destroyed) {
-				intel_gt_pm_put_async(guc_to_gt(guc));
+				intel_gt_pm_put_async_untracked(guc_to_gt(guc));
 				release_guc_id(guc, ce);
 				__guc_context_destroy(ce);
 			}
@@ -1303,6 +1303,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
 	unsigned long flags;
 	u32 reset_count;
 	bool in_reset;
+	intel_wakeref_t wakeref;
 
 	spin_lock_irqsave(&guc->timestamp.lock, flags);
 
@@ -1325,7 +1326,8 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
 	 * start_gt_clk is derived from GuC state. To get a consistent
 	 * view of activity, we query the GuC state only if gt is awake.
 	 */
-	if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
+	wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt);
+	if (wakeref) {
 		stats_saved = *stats;
 		gt_stamp_saved = guc->timestamp.gt_stamp;
 		/*
@@ -1334,7 +1336,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
 		 */
 		guc_update_engine_gt_clks(engine);
 		guc_update_pm_timestamp(guc, now);
-		intel_gt_pm_put_async(gt);
+		intel_gt_pm_put_async(gt, wakeref);
 		if (i915_reset_count(gpu_error) != reset_count) {
 			*stats = stats_saved;
 			guc->timestamp.gt_stamp = gt_stamp_saved;
@@ -3385,9 +3387,9 @@ static void destroyed_worker_func(struct work_struct *w)
 	struct intel_guc *guc = container_of(w, struct intel_guc,
 					     submission_state.destroyed_worker);
 	struct intel_gt *gt = guc_to_gt(guc);
-	int tmp;
+	intel_wakeref_t wakeref;
 
-	with_intel_gt_pm(gt, tmp)
+	with_intel_gt_pm(gt, wakeref)
 		deregister_destroyed_contexts(guc);
 }
 
@@ -4624,12 +4626,12 @@ static bool __guc_submission_supported(struct intel_guc *guc)
 {
 	/* GuC submission is unavailable for pre-Gen11 */
 	return intel_guc_is_supported(guc) &&
-	       GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
+	       GRAPHICS_VER(guc_to_i915(guc)) >= 11;
 }
 
 static bool __guc_submission_selected(struct intel_guc *guc)
 {
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 
 	if (!intel_guc_submission_is_supported(guc))
 		return false;
@@ -4894,7 +4896,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
 		intel_context_put(ce);
 	} else if (context_destroyed(ce)) {
 		/* Context has been destroyed */
-		intel_gt_pm_put_async(guc_to_gt(guc));
+		intel_gt_pm_put_async_untracked(guc_to_gt(guc));
 		release_guc_id(guc, ce);
 		__guc_context_destroy(ce);
 	}
@@ -5001,7 +5003,8 @@ static void capture_error_state(struct intel_guc *guc,
 			if (match) {
 				intel_engine_set_hung_context(e, ce);
 				engine_mask |= e->mask;
-				atomic_inc(&i915->gpu_error.reset_engine_count[e->uabi_class]);
+				i915_increase_reset_engine_count(&i915->gpu_error,
+								 e);
 			}
 		}
 
@@ -5013,7 +5016,7 @@ static void capture_error_state(struct intel_guc *guc,
 	} else {
 		intel_engine_set_hung_context(ce->engine, ce);
 		engine_mask = ce->engine->mask;
-		atomic_inc(&i915->gpu_error.reset_engine_count[ce->engine->uabi_class]);
+		i915_increase_reset_engine_count(&i915->gpu_error, ce->engine);
 	}
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 27f6561dd731..3872d309ed31 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -106,11 +106,6 @@ static void __confirm_options(struct intel_uc *uc)
 		gt_info(gt,  "Incompatible option enable_guc=%d - %s\n",
 			i915->params.enable_guc, "GuC is not supported!");
 
-	if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC &&
-	    !intel_uc_supports_huc(uc))
-		gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
-			i915->params.enable_guc, "HuC is not supported!");
-
 	if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION &&
 	    !intel_uc_supports_guc_submission(uc))
 		gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 362639162ed6..756093eaf2ad 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -1343,16 +1343,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
 
 		for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) {
 			u32 len = min_t(u32, size, PAGE_SIZE - offset);
-			void *vaddr;
 
 			if (idx > 0) {
 				idx--;
 				continue;
 			}
 
-			vaddr = kmap_atomic(page);
-			memcpy(dst, vaddr + offset, len);
-			kunmap_atomic(vaddr);
+			memcpy_from_page(dst, page, offset, len);
 
 			offset = 0;
 			dst += len;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index bfb72143566f..c900aac85adb 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -286,11 +286,126 @@ err_wakeref:
 	return ret;
 }
 
+/*
+ * Send a context schedule H2G message with an invalid context id.
+ * This should generate a GUC_RESULT_INVALID_CONTEXT response.
+ */
+static int bad_h2g(struct intel_guc *guc)
+{
+	u32 action[] = {
+	   INTEL_GUC_ACTION_SCHED_CONTEXT,
+	   0x12345678,
+	};
+
+	return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0);
+}
+
+/*
+ * Set a spinner running to make sure the system is alive and active,
+ * then send a bad but asynchronous H2G command and wait to see if an
+ * error response is returned. If no response is received or if the
+ * spinner dies then the test will fail.
+ */
+#define FAST_RESPONSE_TIMEOUT_MS	1000
+static int intel_guc_fast_request(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_context *ce;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	intel_wakeref_t wakeref;
+	struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt);
+	bool spinning = false;
+	int ret = 0;
+
+	if (!engine)
+		return 0;
+
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce)) {
+		ret = PTR_ERR(ce);
+		gt_err(gt, "Failed to create spinner request: %pe\n", ce);
+		goto err_pm;
+	}
+
+	ret = igt_spinner_init(&spin, engine->gt);
+	if (ret) {
+		gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret));
+		goto err_pm;
+	}
+	spinning = true;
+
+	rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+	intel_context_put(ce);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		gt_err(gt, "Failed to create spinner request: %pe\n", rq);
+		goto err_spin;
+	}
+
+	ret = request_add_spin(rq, &spin);
+	if (ret) {
+		gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+	gt->uc.guc.fast_response_selftest = 1;
+
+	ret = bad_h2g(&gt->uc.guc);
+	if (ret) {
+		gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+	ret = wait_for(gt->uc.guc.fast_response_selftest != 1 || i915_request_completed(rq),
+		       FAST_RESPONSE_TIMEOUT_MS);
+	if (ret) {
+		gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+	if (i915_request_completed(rq)) {
+		gt_err(gt, "Spinner died waiting for fast request error!\n");
+		ret = -EIO;
+		goto err_rq;
+	}
+
+	if (gt->uc.guc.fast_response_selftest != 2) {
+		gt_err(gt, "Unexpected fast response count: %d\n",
+		       gt->uc.guc.fast_response_selftest);
+		goto err_rq;
+	}
+
+	igt_spinner_end(&spin);
+	spinning = false;
+
+	ret = intel_selftest_wait_for_rq(rq);
+	if (ret) {
+		gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+err_rq:
+	i915_request_put(rq);
+
+err_spin:
+	if (spinning)
+		igt_spinner_end(&spin);
+	igt_spinner_fini(&spin);
+
+err_pm:
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+	return ret;
+}
+
 int intel_guc_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(intel_guc_scrub_ctbs),
 		SUBTEST(intel_guc_steal_guc_ids),
+		SUBTEST(intel_guc_fast_request),
 	};
 	struct intel_gt *gt = to_gt(i915);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
index 34b5d952e2bc..26fdc392fce6 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
@@ -74,7 +74,7 @@ static int intel_hang_guc(void *arg)
 		goto err;
 	}
 
-	rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+	rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
 	intel_context_put(ce);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index ddf49c2dbb91..2905df83e180 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1211,11 +1211,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 		for (n = offset >> PAGE_SHIFT; remain; n++) {
 			int len = min(remain, PAGE_SIZE - x);
 
-			src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
+			src = kmap_local_page(i915_gem_object_get_page(src_obj, n));
 			if (src_needs_clflush)
 				drm_clflush_virt_range(src + x, len);
 			memcpy(ptr, src + x, len);
-			kunmap_atomic(src);
+			kunmap_local(src);
 
 			ptr += len;
 			remain -= len;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index bfe92d2402ea..db99c2ef66db 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -51,6 +51,7 @@
 #include "i915_debugfs.h"
 #include "i915_debugfs_params.h"
 #include "i915_driver.h"
+#include "i915_gpu_error.h"
 #include "i915_irq.h"
 #include "i915_reg.h"
 #include "i915_scheduler.h"
@@ -299,107 +300,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
-static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
-			      size_t count, loff_t *pos)
-{
-	struct i915_gpu_coredump *error;
-	ssize_t ret;
-	void *buf;
-
-	error = file->private_data;
-	if (!error)
-		return 0;
-
-	/* Bounce buffer required because of kernfs __user API convenience. */
-	buf = kmalloc(count, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count);
-	if (ret <= 0)
-		goto out;
-
-	if (!copy_to_user(ubuf, buf, ret))
-		*pos += ret;
-	else
-		ret = -EFAULT;
-
-out:
-	kfree(buf);
-	return ret;
-}
-
-static int gpu_state_release(struct inode *inode, struct file *file)
-{
-	i915_gpu_coredump_put(file->private_data);
-	return 0;
-}
-
-static int i915_gpu_info_open(struct inode *inode, struct file *file)
-{
-	struct drm_i915_private *i915 = inode->i_private;
-	struct i915_gpu_coredump *gpu;
-	intel_wakeref_t wakeref;
-
-	gpu = NULL;
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-		gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE);
-
-	if (IS_ERR(gpu))
-		return PTR_ERR(gpu);
-
-	file->private_data = gpu;
-	return 0;
-}
-
-static const struct file_operations i915_gpu_info_fops = {
-	.owner = THIS_MODULE,
-	.open = i915_gpu_info_open,
-	.read = gpu_state_read,
-	.llseek = default_llseek,
-	.release = gpu_state_release,
-};
-
-static ssize_t
-i915_error_state_write(struct file *filp,
-		       const char __user *ubuf,
-		       size_t cnt,
-		       loff_t *ppos)
-{
-	struct i915_gpu_coredump *error = filp->private_data;
-
-	if (!error)
-		return 0;
-
-	drm_dbg(&error->i915->drm, "Resetting error state\n");
-	i915_reset_error_state(error->i915);
-
-	return cnt;
-}
-
-static int i915_error_state_open(struct inode *inode, struct file *file)
-{
-	struct i915_gpu_coredump *error;
-
-	error = i915_first_error_state(inode->i_private);
-	if (IS_ERR(error))
-		return PTR_ERR(error);
-
-	file->private_data  = error;
-	return 0;
-}
-
-static const struct file_operations i915_error_state_fops = {
-	.owner = THIS_MODULE,
-	.open = i915_error_state_open,
-	.read = gpu_state_read,
-	.write = i915_error_state_write,
-	.llseek = default_llseek,
-	.release = gpu_state_release,
-};
-#endif
-
 static int i915_frequency_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
@@ -839,10 +739,6 @@ static const struct i915_debugfs_files {
 	{"i915_perf_noa_delay", &i915_perf_noa_delay_fops},
 	{"i915_wedged", &i915_wedged_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
-#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
-	{"i915_error_state", &i915_error_state_fops},
-	{"i915_gpu_info", &i915_gpu_info_fops},
-#endif
 };
 
 void i915_debugfs_register(struct drm_i915_private *dev_priv)
@@ -865,4 +761,6 @@ void i915_debugfs_register(struct drm_i915_private *dev_priv)
 	drm_debugfs_create_files(i915_debugfs_list,
 				 ARRAY_SIZE(i915_debugfs_list),
 				 minor->debugfs_root, minor);
+
+	i915_gpu_error_debugfs_register(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 2a1faf403965..c7d7c3b7ecc6 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -798,7 +798,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		goto out_cleanup_modeset2;
 
-	intel_pxp_init(i915);
+	ret = intel_pxp_init(i915);
+	if (ret != -ENODEV)
+		drm_dbg(&i915->drm, "pxp init failed with %d\n", ret);
 
 	ret = intel_display_driver_probe(i915);
 	if (ret)
@@ -1033,7 +1035,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915)
 	intel_power_domains_driver_remove(i915);
 	enable_rpm_wakeref_asserts(&i915->runtime_pm);
 
-	intel_runtime_pm_driver_release(&i915->runtime_pm);
+	intel_runtime_pm_driver_last_release(&i915->runtime_pm);
 }
 
 static bool suspend_to_idle(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 2a44b3876cb5..fa6852713bee 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -28,6 +28,10 @@ struct i915_drm_client *i915_drm_client_alloc(void)
 	kref_init(&client->kref);
 	spin_lock_init(&client->ctx_lock);
 	INIT_LIST_HEAD(&client->ctx_list);
+#ifdef CONFIG_PROC_FS
+	spin_lock_init(&client->objects_lock);
+	INIT_LIST_HEAD(&client->objects_list);
+#endif
 
 	return client;
 }
@@ -41,6 +45,68 @@ void __i915_drm_client_free(struct kref *kref)
 }
 
 #ifdef CONFIG_PROC_FS
+static void
+obj_meminfo(struct drm_i915_gem_object *obj,
+	    struct drm_memory_stats stats[INTEL_REGION_UNKNOWN])
+{
+	const enum intel_region_id id = obj->mm.region ?
+					obj->mm.region->id : INTEL_REGION_SMEM;
+	const u64 sz = obj->base.size;
+
+	if (obj->base.handle_count > 1)
+		stats[id].shared += sz;
+	else
+		stats[id].private += sz;
+
+	if (i915_gem_object_has_pages(obj)) {
+		stats[id].resident += sz;
+
+		if (!dma_resv_test_signaled(obj->base.resv,
+					    DMA_RESV_USAGE_BOOKKEEP))
+			stats[id].active += sz;
+		else if (i915_gem_object_is_shrinkable(obj) &&
+			 obj->mm.madv == I915_MADV_DONTNEED)
+			stats[id].purgeable += sz;
+	}
+}
+
+static void show_meminfo(struct drm_printer *p, struct drm_file *file)
+{
+	struct drm_memory_stats stats[INTEL_REGION_UNKNOWN] = {};
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	struct i915_drm_client *client = fpriv->client;
+	struct drm_i915_private *i915 = fpriv->i915;
+	struct drm_i915_gem_object *obj;
+	struct intel_memory_region *mr;
+	struct list_head __rcu *pos;
+	unsigned int id;
+
+	/* Public objects. */
+	spin_lock(&file->table_lock);
+	idr_for_each_entry(&file->object_idr, obj, id)
+		obj_meminfo(obj, stats);
+	spin_unlock(&file->table_lock);
+
+	/* Internal objects. */
+	rcu_read_lock();
+	list_for_each_rcu(pos, &client->objects_list) {
+		obj = i915_gem_object_get_rcu(list_entry(pos, typeof(*obj),
+							 client_link));
+		if (!obj)
+			continue;
+		obj_meminfo(obj, stats);
+		i915_gem_object_put(obj);
+	}
+	rcu_read_unlock();
+
+	for_each_memory_region(mr, i915, id)
+		drm_print_memory_stats(p,
+				       &stats[id],
+				       DRM_GEM_OBJECT_RESIDENT |
+				       DRM_GEM_OBJECT_PURGEABLE,
+				       mr->uabi_name);
+}
+
 static const char * const uabi_class_names[] = {
 	[I915_ENGINE_CLASS_RENDER] = "render",
 	[I915_ENGINE_CLASS_COPY] = "copy",
@@ -102,10 +168,52 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
 	 * ******************************************************************
 	 */
 
+	show_meminfo(p, file);
+
 	if (GRAPHICS_VER(i915) < 8)
 		return;
 
 	for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
 		show_client_class(p, i915, file_priv->client, i);
 }
+
+void i915_drm_client_add_object(struct i915_drm_client *client,
+				struct drm_i915_gem_object *obj)
+{
+	unsigned long flags;
+
+	GEM_WARN_ON(obj->client);
+	GEM_WARN_ON(!list_empty(&obj->client_link));
+
+	spin_lock_irqsave(&client->objects_lock, flags);
+	obj->client = i915_drm_client_get(client);
+	list_add_tail_rcu(&obj->client_link, &client->objects_list);
+	spin_unlock_irqrestore(&client->objects_lock, flags);
+}
+
+void i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
+{
+	struct i915_drm_client *client = fetch_and_zero(&obj->client);
+	unsigned long flags;
+
+	/* Object may not be associated with a client. */
+	if (!client)
+		return;
+
+	spin_lock_irqsave(&client->objects_lock, flags);
+	list_del_rcu(&obj->client_link);
+	spin_unlock_irqrestore(&client->objects_lock, flags);
+
+	i915_drm_client_put(client);
+}
+
+void i915_drm_client_add_context_objects(struct i915_drm_client *client,
+					 struct intel_context *ce)
+{
+	if (ce->state)
+		i915_drm_client_add_object(client, ce->state->obj);
+
+	if (ce->ring != ce->engine->legacy.ring && ce->ring->vma)
+		i915_drm_client_add_object(client, ce->ring->vma->obj);
+}
 #endif
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 67816c912bca..a439dd789936 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -12,6 +12,10 @@
 
 #include <uapi/drm/i915_drm.h>
 
+#include "i915_file_private.h"
+#include "gem/i915_gem_object_types.h"
+#include "gt/intel_context_types.h"
+
 #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
 
 struct drm_file;
@@ -25,6 +29,20 @@ struct i915_drm_client {
 	spinlock_t ctx_lock; /* For add/remove from ctx_list. */
 	struct list_head ctx_list; /* List of contexts belonging to client. */
 
+#ifdef CONFIG_PROC_FS
+	/**
+	 * @objects_lock: lock protecting @objects_list
+	 */
+	spinlock_t objects_lock;
+
+	/**
+	 * @objects_list: list of objects created by this client
+	 *
+	 * Protected by @objects_lock.
+	 */
+	struct list_head objects_list;
+#endif
+
 	/**
 	 * @past_runtime: Accumulation of pphwsp runtimes from closed contexts.
 	 */
@@ -49,4 +67,28 @@ struct i915_drm_client *i915_drm_client_alloc(void);
 
 void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
 
+#ifdef CONFIG_PROC_FS
+void i915_drm_client_add_object(struct i915_drm_client *client,
+				struct drm_i915_gem_object *obj);
+void i915_drm_client_remove_object(struct drm_i915_gem_object *obj);
+void i915_drm_client_add_context_objects(struct i915_drm_client *client,
+					 struct intel_context *ce);
+#else
+static inline void i915_drm_client_add_object(struct i915_drm_client *client,
+					      struct drm_i915_gem_object *obj)
+{
+}
+
+static inline void
+i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
+{
+}
+
+static inline void
+i915_drm_client_add_context_objects(struct i915_drm_client *client,
+				    struct intel_context *ce)
+{
+}
+#endif
+
 #endif /* !__I915_DRM_CLIENT_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 0971f4976324..d04660b60046 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -57,6 +57,7 @@
 #include "i915_memcpy.h"
 #include "i915_reg.h"
 #include "i915_scatterlist.h"
+#include "i915_sysfs.h"
 #include "i915_utils.h"
 
 #define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
@@ -520,7 +521,7 @@ __find_vma(struct i915_vma_coredump *vma, const char *name)
 	return NULL;
 }
 
-struct i915_vma_coredump *
+static struct i915_vma_coredump *
 intel_gpu_error_find_batch(const struct intel_engine_coredump *ee)
 {
 	return __find_vma(ee->vma, "batch");
@@ -609,9 +610,9 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 	va_end(args);
 }
 
-void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
-			       const struct intel_engine_cs *engine,
-			       const struct i915_vma_coredump *vma)
+static void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
+				      const struct intel_engine_cs *engine,
+				      const struct i915_vma_coredump *vma)
 {
 	char out[ASCII85_BUFSZ];
 	struct page *page;
@@ -2140,7 +2141,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du
 	return error;
 }
 
-struct i915_gpu_coredump *
+static struct i915_gpu_coredump *
 i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
 {
 	static DEFINE_MUTEX(capture_mutex);
@@ -2211,7 +2212,7 @@ void i915_capture_error_state(struct intel_gt *gt,
 	i915_gpu_coredump_put(error);
 }
 
-struct i915_gpu_coredump *
+static struct i915_gpu_coredump *
 i915_first_error_state(struct drm_i915_private *i915)
 {
 	struct i915_gpu_coredump *error;
@@ -2378,3 +2379,184 @@ void intel_klog_error_capture(struct intel_gt *gt,
 	drm_info(&i915->drm, "[Capture/%d.%d] Dumped %zd bytes\n", l_count, line++, pos_err);
 }
 #endif
+
+static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
+			      size_t count, loff_t *pos)
+{
+	struct i915_gpu_coredump *error;
+	ssize_t ret;
+	void *buf;
+
+	error = file->private_data;
+	if (!error)
+		return 0;
+
+	/* Bounce buffer required because of kernfs __user API convenience. */
+	buf = kmalloc(count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count);
+	if (ret <= 0)
+		goto out;
+
+	if (!copy_to_user(ubuf, buf, ret))
+		*pos += ret;
+	else
+		ret = -EFAULT;
+
+out:
+	kfree(buf);
+	return ret;
+}
+
+static int gpu_state_release(struct inode *inode, struct file *file)
+{
+	i915_gpu_coredump_put(file->private_data);
+	return 0;
+}
+
+static int i915_gpu_info_open(struct inode *inode, struct file *file)
+{
+	struct drm_i915_private *i915 = inode->i_private;
+	struct i915_gpu_coredump *gpu;
+	intel_wakeref_t wakeref;
+
+	gpu = NULL;
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE);
+
+	if (IS_ERR(gpu))
+		return PTR_ERR(gpu);
+
+	file->private_data = gpu;
+	return 0;
+}
+
+static const struct file_operations i915_gpu_info_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_gpu_info_open,
+	.read = gpu_state_read,
+	.llseek = default_llseek,
+	.release = gpu_state_release,
+};
+
+static ssize_t
+i915_error_state_write(struct file *filp,
+		       const char __user *ubuf,
+		       size_t cnt,
+		       loff_t *ppos)
+{
+	struct i915_gpu_coredump *error = filp->private_data;
+
+	if (!error)
+		return 0;
+
+	drm_dbg(&error->i915->drm, "Resetting error state\n");
+	i915_reset_error_state(error->i915);
+
+	return cnt;
+}
+
+static int i915_error_state_open(struct inode *inode, struct file *file)
+{
+	struct i915_gpu_coredump *error;
+
+	error = i915_first_error_state(inode->i_private);
+	if (IS_ERR(error))
+		return PTR_ERR(error);
+
+	file->private_data  = error;
+	return 0;
+}
+
+static const struct file_operations i915_error_state_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_error_state_open,
+	.read = gpu_state_read,
+	.write = i915_error_state_write,
+	.llseek = default_llseek,
+	.release = gpu_state_release,
+};
+
+void i915_gpu_error_debugfs_register(struct drm_i915_private *i915)
+{
+	struct drm_minor *minor = i915->drm.primary;
+
+	debugfs_create_file("i915_error_state", 0644, minor->debugfs_root, i915,
+			    &i915_error_state_fops);
+	debugfs_create_file("i915_gpu_info", 0644, minor->debugfs_root, i915,
+			    &i915_gpu_info_fops);
+}
+
+static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr, char *buf,
+				loff_t off, size_t count)
+{
+
+	struct device *kdev = kobj_to_dev(kobj);
+	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
+	struct i915_gpu_coredump *gpu;
+	ssize_t ret = 0;
+
+	/*
+	 * FIXME: Concurrent clients triggering resets and reading + clearing
+	 * dumps can cause inconsistent sysfs reads when a user calls in with a
+	 * non-zero offset to complete a prior partial read but the
+	 * gpu_coredump has been cleared or replaced.
+	 */
+
+	gpu = i915_first_error_state(i915);
+	if (IS_ERR(gpu)) {
+		ret = PTR_ERR(gpu);
+	} else if (gpu) {
+		ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
+		i915_gpu_coredump_put(gpu);
+	} else {
+		const char *str = "No error state collected\n";
+		size_t len = strlen(str);
+
+		if (off < len) {
+			ret = min_t(size_t, count, len - off);
+			memcpy(buf, str + off, ret);
+		}
+	}
+
+	return ret;
+}
+
+static ssize_t error_state_write(struct file *file, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
+{
+	struct device *kdev = kobj_to_dev(kobj);
+	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+
+	drm_dbg(&dev_priv->drm, "Resetting error state\n");
+	i915_reset_error_state(dev_priv);
+
+	return count;
+}
+
+static const struct bin_attribute error_state_attr = {
+	.attr.name = "error",
+	.attr.mode = S_IRUSR | S_IWUSR,
+	.size = 0,
+	.read = error_state_read,
+	.write = error_state_write,
+};
+
+void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915)
+{
+	struct device *kdev = i915->drm.primary->kdev;
+
+	if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr))
+		drm_err(&i915->drm, "error_state sysfs setup failed\n");
+}
+
+void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915)
+{
+	struct device *kdev = i915->drm.primary->kdev;
+
+	sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 4ce227f7e1e1..7c255bb1c319 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -17,6 +17,7 @@
 #include "display/intel_display_device.h"
 #include "display/intel_display_params.h"
 #include "gt/intel_engine.h"
+#include "gt/intel_engine_types.h"
 #include "gt/intel_gt_types.h"
 #include "gt/uc/intel_uc_fw.h"
 
@@ -234,7 +235,7 @@ struct i915_gpu_error {
 	atomic_t reset_count;
 
 	/** Number of times an engine has been reset */
-	atomic_t reset_engine_count[I915_NUM_ENGINES];
+	atomic_t reset_engine_count[MAX_ENGINE_CLASS];
 };
 
 struct drm_i915_error_state_buf {
@@ -257,7 +258,14 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
 					  const struct intel_engine_cs *engine)
 {
-	return atomic_read(&error->reset_engine_count[engine->uabi_class]);
+	return atomic_read(&error->reset_engine_count[engine->class]);
+}
+
+static inline void
+i915_increase_reset_engine_count(struct i915_gpu_error *error,
+				 const struct intel_engine_cs *engine)
+{
+	atomic_inc(&error->reset_engine_count[engine->class]);
 }
 
 #define CORE_DUMP_FLAG_NONE           0x0
@@ -277,14 +285,7 @@ static inline void intel_klog_error_capture(struct intel_gt *gt,
 
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
-void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
-			       const struct intel_engine_cs *engine,
-			       const struct i915_vma_coredump *vma);
-struct i915_vma_coredump *
-intel_gpu_error_find_batch(const struct intel_engine_coredump *ee);
-
-struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
-					    intel_engine_mask_t engine_mask, u32 dump_flags);
+
 void i915_capture_error_state(struct intel_gt *gt,
 			      intel_engine_mask_t engine_mask, u32 dump_flags);
 
@@ -332,10 +333,13 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
 		kref_put(&gpu->ref, __i915_gpu_coredump_free);
 }
 
-struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
 void i915_reset_error_state(struct drm_i915_private *i915);
 void i915_disable_error_state(struct drm_i915_private *i915, int err);
 
+void i915_gpu_error_debugfs_register(struct drm_i915_private *i915);
+void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915);
+void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915);
+
 #else
 
 __printf(2, 3)
@@ -403,12 +407,6 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
 {
 }
 
-static inline struct i915_gpu_coredump *
-i915_first_error_state(struct drm_i915_private *i915)
-{
-	return ERR_PTR(-ENODEV);
-}
-
 static inline void i915_reset_error_state(struct drm_i915_private *i915)
 {
 }
@@ -418,6 +416,18 @@ static inline void i915_disable_error_state(struct drm_i915_private *i915,
 {
 }
 
+static inline void i915_gpu_error_debugfs_register(struct drm_i915_private *i915)
+{
+}
+
+static inline void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915)
+{
+}
+
+static inline void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915)
+{
+}
+
 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
 
 #endif /* _I915_GPU_ERROR_H_ */
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
index 975da8e7f2a9..8c3f443c8347 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -175,7 +175,7 @@ hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
 	 *     tau4 = (4 | x) << y
 	 * but add 2 when doing the final right shift to account for units
 	 */
-	tau4 = ((1 << x_w) | x) << y;
+	tau4 = (u64)((1 << x_w) | x) << y;
 	/* val in hwmon interface units (millisec) */
 	out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
 
@@ -211,7 +211,7 @@ hwm_power1_max_interval_store(struct device *dev,
 	r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
 	x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
 	y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
-	tau4 = ((1 << x_w) | x) << y;
+	tau4 = (u64)((1 << x_w) | x) << y;
 	max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
 
 	if (val > max_win)
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index 13b1ae9b96c7..46445248d193 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -291,7 +291,8 @@ struct i915_perf_stream {
 		int size_exponent;
 
 		/**
-		 * @ptr_lock: Locks reads and writes to all head/tail state
+		 * @oa_buffer.ptr_lock: Locks reads and writes to all
+		 * head/tail state
 		 *
 		 * Consider: the head and tail pointer state needs to be read
 		 * consistently from a hrtimer callback (atomic context) and
@@ -313,7 +314,8 @@ struct i915_perf_stream {
 		spinlock_t ptr_lock;
 
 		/**
-		 * @head: Although we can always read back the head pointer register,
+		 * @oa_buffer.head: Although we can always read back
+		 * the head pointer register,
 		 * we prefer to avoid trusting the HW state, just to avoid any
 		 * risk that some hardware condition could * somehow bump the
 		 * head pointer unpredictably and cause us to forward the wrong
@@ -322,7 +324,8 @@ struct i915_perf_stream {
 		u32 head;
 
 		/**
-		 * @tail: The last verified tail that can be read by userspace.
+		 * @oa_buffer.tail: The last verified tail that can be
+		 * read by userspace.
 		 */
 		u32 tail;
 	} oa_buffer;
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index f861863eb7c1..21eb0c5b320d 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -31,6 +31,16 @@
 static cpumask_t i915_pmu_cpumask;
 static unsigned int i915_pmu_target_cpu = -1;
 
+static struct i915_pmu *event_to_pmu(struct perf_event *event)
+{
+	return container_of(event->pmu, struct i915_pmu, base);
+}
+
+static struct drm_i915_private *pmu_to_i915(struct i915_pmu *pmu)
+{
+	return container_of(pmu, struct drm_i915_private, pmu);
+}
+
 static u8 engine_config_sample(u64 config)
 {
 	return config & I915_PMU_SAMPLE_MASK;
@@ -141,7 +151,7 @@ static u32 frequency_enabled_mask(void)
 
 static bool pmu_needs_timer(struct i915_pmu *pmu)
 {
-	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	u32 enable;
 
 	/*
@@ -213,19 +223,19 @@ static u64 get_rc6(struct intel_gt *gt)
 	struct drm_i915_private *i915 = gt->i915;
 	const unsigned int gt_id = gt->info.id;
 	struct i915_pmu *pmu = &i915->pmu;
+	intel_wakeref_t wakeref;
 	unsigned long flags;
-	bool awake = false;
 	u64 val;
 
-	if (intel_gt_pm_get_if_awake(gt)) {
+	wakeref = intel_gt_pm_get_if_awake(gt);
+	if (wakeref) {
 		val = __get_rc6(gt);
-		intel_gt_pm_put_async(gt);
-		awake = true;
+		intel_gt_pm_put_async(gt, wakeref);
 	}
 
 	spin_lock_irqsave(&pmu->lock, flags);
 
-	if (awake) {
+	if (wakeref) {
 		store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
 	} else {
 		/*
@@ -251,7 +261,7 @@ static u64 get_rc6(struct intel_gt *gt)
 
 static void init_rc6(struct i915_pmu *pmu)
 {
-	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	struct intel_gt *gt;
 	unsigned int i;
 
@@ -429,12 +439,14 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
 	const unsigned int gt_id = gt->info.id;
 	struct i915_pmu *pmu = &i915->pmu;
 	struct intel_rps *rps = &gt->rps;
+	intel_wakeref_t wakeref;
 
 	if (!frequency_sampling_enabled(pmu, gt_id))
 		return;
 
 	/* Report 0/0 (actual/requested) frequency while parked. */
-	if (!intel_gt_pm_get_if_awake(gt))
+	wakeref = intel_gt_pm_get_if_awake(gt);
+	if (!wakeref)
 		return;
 
 	if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) {
@@ -463,14 +475,13 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
 				period_ns / 1000);
 	}
 
-	intel_gt_pm_put_async(gt);
+	intel_gt_pm_put_async(gt, wakeref);
 }
 
 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 {
-	struct drm_i915_private *i915 =
-		container_of(hrtimer, struct drm_i915_private, pmu.timer);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = container_of(hrtimer, struct i915_pmu, timer);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	unsigned int period_ns;
 	struct intel_gt *gt;
 	unsigned int i;
@@ -505,8 +516,8 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 
 static void i915_pmu_event_destroy(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 
 	drm_WARN_ON(&i915->drm, event->parent);
 
@@ -572,8 +583,8 @@ config_status(struct drm_i915_private *i915, u64 config)
 
 static int engine_event_init(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	struct intel_engine_cs *engine;
 
 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
@@ -586,9 +597,8 @@ static int engine_event_init(struct perf_event *event)
 
 static int i915_pmu_event_init(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	int ret;
 
 	if (pmu->closed)
@@ -628,9 +638,8 @@ static int i915_pmu_event_init(struct perf_event *event)
 
 static u64 __i915_pmu_event_read(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	u64 val = 0;
 
 	if (is_engine_event(event)) {
@@ -686,10 +695,8 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 
 static void i915_pmu_event_read(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
 	struct hw_perf_event *hwc = &event->hw;
-	struct i915_pmu *pmu = &i915->pmu;
 	u64 prev, new;
 
 	if (pmu->closed) {
@@ -707,10 +714,9 @@ static void i915_pmu_event_read(struct perf_event *event)
 
 static void i915_pmu_enable(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	const unsigned int bit = event_bit(event);
-	struct i915_pmu *pmu = &i915->pmu;
 	unsigned long flags;
 
 	if (bit == -1)
@@ -771,10 +777,9 @@ update:
 
 static void i915_pmu_disable(struct perf_event *event)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = event_to_pmu(event);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	const unsigned int bit = event_bit(event);
-	struct i915_pmu *pmu = &i915->pmu;
 	unsigned long flags;
 
 	if (bit == -1)
@@ -818,9 +823,7 @@ static void i915_pmu_disable(struct perf_event *event)
 
 static void i915_pmu_event_start(struct perf_event *event, int flags)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = event_to_pmu(event);
 
 	if (pmu->closed)
 		return;
@@ -848,9 +851,7 @@ out:
 
 static int i915_pmu_event_add(struct perf_event *event, int flags)
 {
-	struct drm_i915_private *i915 =
-		container_of(event->pmu, typeof(*i915), pmu.base);
-	struct i915_pmu *pmu = &i915->pmu;
+	struct i915_pmu *pmu = event_to_pmu(event);
 
 	if (pmu->closed)
 		return -ENODEV;
@@ -982,7 +983,7 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
 static struct attribute **
 create_event_attributes(struct i915_pmu *pmu)
 {
-	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
+	struct drm_i915_private *i915 = pmu_to_i915(pmu);
 	static const struct {
 		unsigned int counter;
 		const char *name;
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index e88bb4f04305..613decd47760 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -155,81 +155,6 @@ static const struct bin_attribute dpf_attrs_1 = {
 	.private = (void *)1
 };
 
-#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
-
-static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
-				struct bin_attribute *attr, char *buf,
-				loff_t off, size_t count)
-{
-
-	struct device *kdev = kobj_to_dev(kobj);
-	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
-	struct i915_gpu_coredump *gpu;
-	ssize_t ret = 0;
-
-	/*
-	 * FIXME: Concurrent clients triggering resets and reading + clearing
-	 * dumps can cause inconsistent sysfs reads when a user calls in with a
-	 * non-zero offset to complete a prior partial read but the
-	 * gpu_coredump has been cleared or replaced.
-	 */
-
-	gpu = i915_first_error_state(i915);
-	if (IS_ERR(gpu)) {
-		ret = PTR_ERR(gpu);
-	} else if (gpu) {
-		ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
-		i915_gpu_coredump_put(gpu);
-	} else {
-		const char *str = "No error state collected\n";
-		size_t len = strlen(str);
-
-		if (off < len) {
-			ret = min_t(size_t, count, len - off);
-			memcpy(buf, str + off, ret);
-		}
-	}
-
-	return ret;
-}
-
-static ssize_t error_state_write(struct file *file, struct kobject *kobj,
-				 struct bin_attribute *attr, char *buf,
-				 loff_t off, size_t count)
-{
-	struct device *kdev = kobj_to_dev(kobj);
-	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-
-	drm_dbg(&dev_priv->drm, "Resetting error state\n");
-	i915_reset_error_state(dev_priv);
-
-	return count;
-}
-
-static const struct bin_attribute error_state_attr = {
-	.attr.name = "error",
-	.attr.mode = S_IRUSR | S_IWUSR,
-	.size = 0,
-	.read = error_state_read,
-	.write = error_state_write,
-};
-
-static void i915_setup_error_capture(struct device *kdev)
-{
-	if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr))
-		drm_err(&kdev_minor_to_i915(kdev)->drm,
-			"error_state sysfs setup failed\n");
-}
-
-static void i915_teardown_error_capture(struct device *kdev)
-{
-	sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
-}
-#else
-static void i915_setup_error_capture(struct device *kdev) {}
-static void i915_teardown_error_capture(struct device *kdev) {}
-#endif
-
 void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 {
 	struct device *kdev = dev_priv->drm.primary->kdev;
@@ -255,7 +180,7 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 		drm_warn(&dev_priv->drm,
 			 "failed to register GT sysfs directory\n");
 
-	i915_setup_error_capture(kdev);
+	i915_gpu_error_sysfs_setup(dev_priv);
 
 	intel_engines_add_sysfs(dev_priv);
 }
@@ -264,7 +189,7 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
 {
 	struct device *kdev = dev_priv->drm.primary->kdev;
 
-	i915_teardown_error_capture(kdev);
+	i915_gpu_error_sysfs_teardown(dev_priv);
 
 	device_remove_bin_file(kdev,  &dpf_attrs_1);
 	device_remove_bin_file(kdev,  &dpf_attrs);
diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
index 3d1fdea9811d..60a03340bbd4 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -216,6 +216,22 @@ static int intel_memory_region_memtest(struct intel_memory_region *mem,
 	return err;
 }
 
+static const char *region_type_str(u16 type)
+{
+	switch (type) {
+	case INTEL_MEMORY_SYSTEM:
+		return "system";
+	case INTEL_MEMORY_LOCAL:
+		return "local";
+	case INTEL_MEMORY_STOLEN_LOCAL:
+		return "stolen-local";
+	case INTEL_MEMORY_STOLEN_SYSTEM:
+		return "stolen-system";
+	default:
+		return "unknown";
+	}
+}
+
 struct intel_memory_region *
 intel_memory_region_create(struct drm_i915_private *i915,
 			   resource_size_t start,
@@ -244,6 +260,9 @@ intel_memory_region_create(struct drm_i915_private *i915,
 	mem->type = type;
 	mem->instance = instance;
 
+	snprintf(mem->uabi_name, sizeof(mem->uabi_name), "%s%u",
+		 region_type_str(type), instance);
+
 	mutex_init(&mem->objects.lock);
 	INIT_LIST_HEAD(&mem->objects.list);
 
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h
index 2953ed5c3248..9ba36454e51b 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.h
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -80,6 +80,7 @@ struct intel_memory_region {
 	u16 instance;
 	enum intel_region_id id;
 	char name[16];
+	char uabi_name[16];
 	bool private; /* not for userspace */
 
 	struct {
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 8743153fad87..860b51b56a92 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -50,184 +50,44 @@
  * present for a given platform.
  */
 
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
-
-#include <linux/sort.h>
-
-#define STACKDEPTH 8
-
-static noinline depot_stack_handle_t __save_depot_stack(void)
+static struct drm_i915_private *rpm_to_i915(struct intel_runtime_pm *rpm)
 {
-	unsigned long entries[STACKDEPTH];
-	unsigned int n;
-
-	n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
-	return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
+	return container_of(rpm, struct drm_i915_private, runtime_pm);
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+
 static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
-	spin_lock_init(&rpm->debug.lock);
-	stack_depot_init();
+	ref_tracker_dir_init(&rpm->debug, INTEL_REFTRACK_DEAD_COUNT, dev_name(rpm->kdev));
 }
 
-static noinline depot_stack_handle_t
+static intel_wakeref_t
 track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
-	depot_stack_handle_t stack, *stacks;
-	unsigned long flags;
-
-	if (rpm->no_wakeref_tracking)
+	if (!rpm->available || rpm->no_wakeref_tracking)
 		return -1;
 
-	stack = __save_depot_stack();
-	if (!stack)
-		return -1;
-
-	spin_lock_irqsave(&rpm->debug.lock, flags);
-
-	if (!rpm->debug.count)
-		rpm->debug.last_acquire = stack;
-
-	stacks = krealloc(rpm->debug.owners,
-			  (rpm->debug.count + 1) * sizeof(*stacks),
-			  GFP_NOWAIT | __GFP_NOWARN);
-	if (stacks) {
-		stacks[rpm->debug.count++] = stack;
-		rpm->debug.owners = stacks;
-	} else {
-		stack = -1;
-	}
-
-	spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
-	return stack;
+	return intel_ref_tracker_alloc(&rpm->debug);
 }
 
 static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
-					     depot_stack_handle_t stack)
+					     intel_wakeref_t wakeref)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
-	unsigned long flags, n;
-	bool found = false;
-
-	if (unlikely(stack == -1))
+	if (!rpm->available || rpm->no_wakeref_tracking)
 		return;
 
-	spin_lock_irqsave(&rpm->debug.lock, flags);
-	for (n = rpm->debug.count; n--; ) {
-		if (rpm->debug.owners[n] == stack) {
-			memmove(rpm->debug.owners + n,
-				rpm->debug.owners + n + 1,
-				(--rpm->debug.count - n) * sizeof(stack));
-			found = true;
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
-	if (drm_WARN(&i915->drm, !found,
-		     "Unmatched wakeref (tracking %lu), count %u\n",
-		     rpm->debug.count, atomic_read(&rpm->wakeref_count))) {
-		char *buf;
-
-		buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
-		if (!buf)
-			return;
-
-		stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
-		DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
-
-		stack = READ_ONCE(rpm->debug.last_release);
-		if (stack) {
-			stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
-			DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf);
-		}
-
-		kfree(buf);
-	}
+	intel_ref_tracker_free(&rpm->debug, wakeref);
 }
 
-static int cmphandle(const void *_a, const void *_b)
+static void untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
 {
-	const depot_stack_handle_t * const a = _a, * const b = _b;
-
-	if (*a < *b)
-		return -1;
-	else if (*a > *b)
-		return 1;
-	else
-		return 0;
-}
-
-static void
-__print_intel_runtime_pm_wakeref(struct drm_printer *p,
-				 const struct intel_runtime_pm_debug *dbg)
-{
-	unsigned long i;
-	char *buf;
-
-	buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
-	if (!buf)
-		return;
-
-	if (dbg->last_acquire) {
-		stack_depot_snprint(dbg->last_acquire, buf, PAGE_SIZE, 2);
-		drm_printf(p, "Wakeref last acquired:\n%s", buf);
-	}
-
-	if (dbg->last_release) {
-		stack_depot_snprint(dbg->last_release, buf, PAGE_SIZE, 2);
-		drm_printf(p, "Wakeref last released:\n%s", buf);
-	}
-
-	drm_printf(p, "Wakeref count: %lu\n", dbg->count);
-
-	sort(dbg->owners, dbg->count, sizeof(*dbg->owners), cmphandle, NULL);
-
-	for (i = 0; i < dbg->count; i++) {
-		depot_stack_handle_t stack = dbg->owners[i];
-		unsigned long rep;
-
-		rep = 1;
-		while (i + 1 < dbg->count && dbg->owners[i + 1] == stack)
-			rep++, i++;
-		stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
-		drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
-	}
-
-	kfree(buf);
-}
-
-static noinline void
-__untrack_all_wakerefs(struct intel_runtime_pm_debug *debug,
-		       struct intel_runtime_pm_debug *saved)
-{
-	*saved = *debug;
-
-	debug->owners = NULL;
-	debug->count = 0;
-	debug->last_release = __save_depot_stack();
-}
-
-static void
-dump_and_free_wakeref_tracking(struct intel_runtime_pm_debug *debug)
-{
-	if (debug->count) {
-		struct drm_printer p = drm_debug_printer("i915");
-
-		__print_intel_runtime_pm_wakeref(&p, debug);
-	}
-
-	kfree(debug->owners);
+	ref_tracker_dir_exit(&rpm->debug);
 }
 
 static noinline void
 __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
 {
-	struct intel_runtime_pm_debug dbg = {};
 	unsigned long flags;
 
 	if (!atomic_dec_and_lock_irqsave(&rpm->wakeref_count,
@@ -235,60 +95,14 @@ __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
 					 flags))
 		return;
 
-	__untrack_all_wakerefs(&rpm->debug, &dbg);
+	ref_tracker_dir_print_locked(&rpm->debug, INTEL_REFTRACK_PRINT_LIMIT);
 	spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
-	dump_and_free_wakeref_tracking(&dbg);
-}
-
-static noinline void
-untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
-{
-	struct intel_runtime_pm_debug dbg = {};
-	unsigned long flags;
-
-	spin_lock_irqsave(&rpm->debug.lock, flags);
-	__untrack_all_wakerefs(&rpm->debug, &dbg);
-	spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
-	dump_and_free_wakeref_tracking(&dbg);
 }
 
 void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
 				    struct drm_printer *p)
 {
-	struct intel_runtime_pm_debug dbg = {};
-
-	do {
-		unsigned long alloc = dbg.count;
-		depot_stack_handle_t *s;
-
-		spin_lock_irq(&rpm->debug.lock);
-		dbg.count = rpm->debug.count;
-		if (dbg.count <= alloc) {
-			memcpy(dbg.owners,
-			       rpm->debug.owners,
-			       dbg.count * sizeof(*s));
-		}
-		dbg.last_acquire = rpm->debug.last_acquire;
-		dbg.last_release = rpm->debug.last_release;
-		spin_unlock_irq(&rpm->debug.lock);
-		if (dbg.count <= alloc)
-			break;
-
-		s = krealloc(dbg.owners,
-			     dbg.count * sizeof(*s),
-			     GFP_NOWAIT | __GFP_NOWARN);
-		if (!s)
-			goto out;
-
-		dbg.owners = s;
-	} while (1);
-
-	__print_intel_runtime_pm_wakeref(p, &dbg);
-
-out:
-	kfree(dbg.owners);
+	intel_ref_tracker_show(&rpm->debug, p);
 }
 
 #else
@@ -297,14 +111,14 @@ static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
 }
 
-static depot_stack_handle_t
+static intel_wakeref_t
 track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
 	return -1;
 }
 
 static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
-					     intel_wakeref_t wref)
+					     intel_wakeref_t wakeref)
 {
 }
 
@@ -349,9 +163,7 @@ intel_runtime_pm_release(struct intel_runtime_pm *rpm, int wakelock)
 static intel_wakeref_t __intel_runtime_pm_get(struct intel_runtime_pm *rpm,
 					      bool wakelock)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	int ret;
 
 	ret = pm_runtime_get_sync(rpm->kdev);
@@ -556,9 +368,7 @@ void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref)
  */
 void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	struct device *kdev = rpm->kdev;
 
 	/*
@@ -611,9 +421,7 @@ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
 
 void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	struct device *kdev = rpm->kdev;
 
 	/* Transfer rpm ownership back to core */
@@ -628,9 +436,7 @@ void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)
 
 void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
 {
-	struct drm_i915_private *i915 = container_of(rpm,
-						     struct drm_i915_private,
-						     runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	int count = atomic_read(&rpm->wakeref_count);
 
 	intel_wakeref_auto_fini(&rpm->userfault_wakeref);
@@ -639,14 +445,17 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
 		 "i915 raw-wakerefs=%d wakelocks=%d on cleanup\n",
 		 intel_rpm_raw_wakeref_count(count),
 		 intel_rpm_wakelock_count(count));
+}
 
+void intel_runtime_pm_driver_last_release(struct intel_runtime_pm *rpm)
+{
+	intel_runtime_pm_driver_release(rpm);
 	untrack_all_intel_runtime_pm_wakerefs(rpm);
 }
 
 void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm)
 {
-	struct drm_i915_private *i915 =
-			container_of(rpm, struct drm_i915_private, runtime_pm);
+	struct drm_i915_private *i915 = rpm_to_i915(rpm);
 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
 	struct device *kdev = &pdev->dev;
 
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h
index be43614c73fd..de3579d399e1 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.h
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.h
@@ -75,15 +75,7 @@ struct intel_runtime_pm {
 	 * paired rpm_put) we can remove corresponding pairs of and keep
 	 * the array trimmed to active wakerefs.
 	 */
-	struct intel_runtime_pm_debug {
-		spinlock_t lock;
-
-		depot_stack_handle_t last_acquire;
-		depot_stack_handle_t last_release;
-
-		depot_stack_handle_t *owners;
-		unsigned long count;
-	} debug;
+	struct ref_tracker_dir debug;
 #endif
 };
 
@@ -187,6 +179,7 @@ void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm);
 void intel_runtime_pm_enable(struct intel_runtime_pm *rpm);
 void intel_runtime_pm_disable(struct intel_runtime_pm *rpm);
 void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm);
+void intel_runtime_pm_driver_last_release(struct intel_runtime_pm *rpm);
 
 intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm);
 intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm);
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index 623a69089386..dea2f63184f8 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -99,7 +99,8 @@ static void __intel_wakeref_put_work(struct work_struct *wrk)
 void __intel_wakeref_init(struct intel_wakeref *wf,
 			  struct drm_i915_private *i915,
 			  const struct intel_wakeref_ops *ops,
-			  struct intel_wakeref_lockclass *key)
+			  struct intel_wakeref_lockclass *key,
+			  const char *name)
 {
 	wf->i915 = i915;
 	wf->ops = ops;
@@ -111,6 +112,10 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
 	INIT_DELAYED_WORK(&wf->work, __intel_wakeref_put_work);
 	lockdep_init_map(&wf->work.work.lockdep_map,
 			 "wakeref.work", &key->work, 0);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
+	ref_tracker_dir_init(&wf->debug, INTEL_REFTRACK_DEAD_COUNT, name);
+#endif
 }
 
 int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
@@ -191,3 +196,31 @@ void intel_wakeref_auto_fini(struct intel_wakeref_auto *wf)
 	intel_wakeref_auto(wf, 0);
 	INTEL_WAKEREF_BUG_ON(wf->wakeref);
 }
+
+void intel_ref_tracker_show(struct ref_tracker_dir *dir,
+			    struct drm_printer *p)
+{
+	const size_t buf_size = PAGE_SIZE;
+	char *buf, *sb, *se;
+	size_t count;
+
+	buf = kmalloc(buf_size, GFP_NOWAIT);
+	if (!buf)
+		return;
+
+	count = ref_tracker_dir_snprint(dir, buf, buf_size);
+	if (!count)
+		goto free;
+	/* printk does not like big buffers, so we split it */
+	for (sb = buf; *sb; sb = se + 1) {
+		se = strchrnul(sb, '\n');
+		drm_printf(p, "%.*s", (int)(se - sb + 1), sb);
+		if (!*se)
+			break;
+	}
+	if (count >= buf_size)
+		drm_printf(p, "\n...dropped %zd extra bytes of leak report.\n",
+			   count + 1 - buf_size);
+free:
+	kfree(buf);
+}
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index ec881b097368..68aa3be48251 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -7,16 +7,25 @@
 #ifndef INTEL_WAKEREF_H
 #define INTEL_WAKEREF_H
 
+#include <drm/drm_print.h>
+
 #include <linux/atomic.h>
 #include <linux/bitfield.h>
 #include <linux/bits.h>
 #include <linux/lockdep.h>
 #include <linux/mutex.h>
 #include <linux/refcount.h>
+#include <linux/ref_tracker.h>
+#include <linux/slab.h>
 #include <linux/stackdepot.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 
+typedef unsigned long intel_wakeref_t;
+
+#define INTEL_REFTRACK_DEAD_COUNT 16
+#define INTEL_REFTRACK_PRINT_LIMIT 16
+
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
 #define INTEL_WAKEREF_BUG_ON(expr) BUG_ON(expr)
 #else
@@ -26,8 +35,6 @@
 struct intel_runtime_pm;
 struct intel_wakeref;
 
-typedef depot_stack_handle_t intel_wakeref_t;
-
 struct intel_wakeref_ops {
 	int (*get)(struct intel_wakeref *wf);
 	int (*put)(struct intel_wakeref *wf);
@@ -43,6 +50,10 @@ struct intel_wakeref {
 	const struct intel_wakeref_ops *ops;
 
 	struct delayed_work work;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
+	struct ref_tracker_dir debug;
+#endif
 };
 
 struct intel_wakeref_lockclass {
@@ -53,11 +64,12 @@ struct intel_wakeref_lockclass {
 void __intel_wakeref_init(struct intel_wakeref *wf,
 			  struct drm_i915_private *i915,
 			  const struct intel_wakeref_ops *ops,
-			  struct intel_wakeref_lockclass *key);
-#define intel_wakeref_init(wf, i915, ops) do {				\
+			  struct intel_wakeref_lockclass *key,
+			  const char *name);
+#define intel_wakeref_init(wf, i915, ops, name) do {			\
 	static struct intel_wakeref_lockclass __key;			\
 									\
-	__intel_wakeref_init((wf), (i915), (ops), &__key);		\
+	__intel_wakeref_init((wf), (i915), (ops), &__key, name);	\
 } while (0)
 
 int __intel_wakeref_get_first(struct intel_wakeref *wf);
@@ -261,6 +273,57 @@ __intel_wakeref_defer_park(struct intel_wakeref *wf)
  */
 int intel_wakeref_wait_for_idle(struct intel_wakeref *wf);
 
+#define INTEL_WAKEREF_DEF ((intel_wakeref_t)(-1))
+
+static inline intel_wakeref_t intel_ref_tracker_alloc(struct ref_tracker_dir *dir)
+{
+	struct ref_tracker *user = NULL;
+
+	ref_tracker_alloc(dir, &user, GFP_NOWAIT);
+
+	return (intel_wakeref_t)user ?: INTEL_WAKEREF_DEF;
+}
+
+static inline void intel_ref_tracker_free(struct ref_tracker_dir *dir,
+					  intel_wakeref_t handle)
+{
+	struct ref_tracker *user;
+
+	user = (handle == INTEL_WAKEREF_DEF) ? NULL : (void *)handle;
+
+	ref_tracker_free(dir, &user);
+}
+
+void intel_ref_tracker_show(struct ref_tracker_dir *dir,
+			    struct drm_printer *p);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
+
+static inline intel_wakeref_t intel_wakeref_track(struct intel_wakeref *wf)
+{
+	return intel_ref_tracker_alloc(&wf->debug);
+}
+
+static inline void intel_wakeref_untrack(struct intel_wakeref *wf,
+					 intel_wakeref_t handle)
+{
+	intel_ref_tracker_free(&wf->debug, handle);
+}
+
+#else
+
+static inline intel_wakeref_t intel_wakeref_track(struct intel_wakeref *wf)
+{
+	return -1;
+}
+
+static inline void intel_wakeref_untrack(struct intel_wakeref *wf,
+					 intel_wakeref_t handle)
+{
+}
+
+#endif
+
 struct intel_wakeref_auto {
 	struct drm_i915_private *i915;
 	struct timer_list timer;
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c
index dc327cf40b5a..75278e78ca90 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c
@@ -199,6 +199,9 @@ int intel_pxp_init(struct drm_i915_private *i915)
 	struct intel_gt *gt;
 	bool is_full_feature = false;
 
+	if (intel_gt_is_wedged(to_gt(i915)))
+		return -ENOTCONN;
+
 	/*
 	 * NOTE: Get the ctrl_gt before checking intel_pxp_is_supported since
 	 * we still need it if PXP's backend tee transport is needed.
@@ -303,6 +306,8 @@ static int __pxp_global_teardown_final(struct intel_pxp *pxp)
 
 	if (!pxp->arb_is_valid)
 		return 0;
+
+	drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for suspend/fini");
 	/*
 	 * To ensure synchronous and coherent session teardown completion
 	 * in response to suspend or shutdown triggers, don't use a worker.
@@ -324,6 +329,8 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp)
 
 	if (pxp->arb_is_valid)
 		return 0;
+
+	drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for restart");
 	/*
 	 * The arb-session is currently inactive and we are doing a reset and restart
 	 * due to a runtime event. Use the worker that was designed for this.
@@ -332,8 +339,11 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp)
 
 	timeout = intel_pxp_get_backend_timeout_ms(pxp);
 
-	if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout)))
+	if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout))) {
+		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: restart backend timed out (%d ms)",
+			timeout);
 		return -ETIMEDOUT;
+	}
 
 	return 0;
 }
@@ -414,10 +424,12 @@ int intel_pxp_start(struct intel_pxp *pxp)
 	int ret = 0;
 
 	ret = intel_pxp_get_readiness_status(pxp, PXP_READINESS_TIMEOUT);
-	if (ret < 0)
+	if (ret < 0) {
+		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: tried but not-avail (%d)", ret);
 		return ret;
-	else if (ret > 1)
+	} else if (ret > 1) {
 		return -EIO; /* per UAPI spec, user may retry later */
+	}
 
 	mutex_lock(&pxp->arb_mutex);
 
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
index 91e9622c07d0..d81750b9bdda 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
@@ -40,11 +40,12 @@ void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir)
 		   GEN12_DISPLAY_APP_TERMINATED_PER_FW_REQ_INTERRUPT)) {
 		/* immediately mark PXP as inactive on termination */
 		intel_pxp_mark_termination_in_progress(pxp);
-		pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED;
+		pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED |
+				       PXP_EVENT_TYPE_IRQ;
 	}
 
 	if (iir & GEN12_DISPLAY_STATE_RESET_COMPLETE_INTERRUPT)
-		pxp->session_events |= PXP_TERMINATION_COMPLETE;
+		pxp->session_events |= PXP_TERMINATION_COMPLETE | PXP_EVENT_TYPE_IRQ;
 
 	if (pxp->session_events)
 		queue_work(system_unbound_wq, &pxp->session_work);
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
index 0a3e66b0265e..091c86e03d1a 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
@@ -137,8 +137,10 @@ void intel_pxp_terminate(struct intel_pxp *pxp, bool post_invalidation_needs_res
 static void pxp_terminate_complete(struct intel_pxp *pxp)
 {
 	/* Re-create the arb session after teardown handle complete */
-	if (fetch_and_zero(&pxp->hw_state_invalidated))
+	if (fetch_and_zero(&pxp->hw_state_invalidated)) {
+		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: creating arb_session after invalidation");
 		pxp_create_arb_session(pxp);
+	}
 
 	complete_all(&pxp->termination);
 }
@@ -157,6 +159,8 @@ static void pxp_session_work(struct work_struct *work)
 	if (!events)
 		return;
 
+	drm_dbg(&gt->i915->drm, "PXP: processing event-flags 0x%08x", events);
+
 	if (events & PXP_INVAL_REQUIRED)
 		intel_pxp_invalidate(pxp);
 
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
index 7e11fa8034b2..07864b584cf4 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
@@ -124,6 +124,7 @@ struct intel_pxp {
 #define PXP_TERMINATION_REQUEST  BIT(0)
 #define PXP_TERMINATION_COMPLETE BIT(1)
 #define PXP_INVAL_REQUIRED       BIT(2)
+#define PXP_EVENT_TYPE_IRQ       BIT(3)
 };
 
 #endif /* __INTEL_PXP_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_syncmap.c b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
index 47f4ae18a1ef..88fa845e9f4a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_syncmap.c
+++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
@@ -77,7 +77,7 @@ __sync_print(struct i915_syncmap *p,
 		for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) {
 			buf = __sync_print(__sync_child(p)[i], buf, sz,
 					   depth + 1,
-					   last << 1 | !!(p->bitmap >> (i + 1)),
+					   last << 1 | ((p->bitmap >> (i + 1)) ? 1 : 0),
 					   i);
 		}
 	}
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index 4ddc6d902752..7d41874a49c5 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -37,8 +37,9 @@ int igt_live_test_begin(struct igt_live_test *t,
 		}
 
 		for_each_engine(engine, gt, id)
-			t->reset_engine[id] =
-			i915_reset_engine_count(&i915->gpu_error, engine);
+			t->reset_engine[i][id] =
+				i915_reset_engine_count(&i915->gpu_error,
+							engine);
 	}
 
 	t->reset_global = i915_reset_count(&i915->gpu_error);
@@ -66,14 +67,14 @@ int igt_live_test_end(struct igt_live_test *t)
 
 	for_each_gt(gt, i915, i) {
 		for_each_engine(engine, gt, id) {
-			if (t->reset_engine[id] ==
+			if (t->reset_engine[i][id] ==
 			    i915_reset_engine_count(&i915->gpu_error, engine))
 				continue;
 
 			gt_err(gt, "%s(%s): engine '%s' was reset %d times!\n",
 			       t->func, t->name, engine->name,
 			       i915_reset_engine_count(&i915->gpu_error, engine) -
-			       t->reset_engine[id]);
+			       t->reset_engine[i][id]);
 			return -EIO;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h
index 36ed42736c52..83e3ad430922 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.h
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h
@@ -7,6 +7,7 @@
 #ifndef IGT_LIVE_TEST_H
 #define IGT_LIVE_TEST_H
 
+#include "gt/intel_gt_defines.h" /* for I915_MAX_GT */
 #include "gt/intel_engine.h" /* for I915_NUM_ENGINES */
 
 struct drm_i915_private;
@@ -17,7 +18,7 @@ struct igt_live_test {
 	const char *name;
 
 	unsigned int reset_global;
-	unsigned int reset_engine[I915_NUM_ENGINES];
+	unsigned int reset_engine[I915_MAX_GT][I915_NUM_ENGINES];
 };
 
 /*
diff --git a/drivers/gpu/drm/imagination/pvr_job.c b/drivers/gpu/drm/imagination/pvr_job.c
index 04139da6c04d..78c2f3c6dce0 100644
--- a/drivers/gpu/drm/imagination/pvr_job.c
+++ b/drivers/gpu/drm/imagination/pvr_job.c
@@ -746,7 +746,7 @@ pvr_submit_jobs(struct pvr_device *pvr_dev, struct pvr_file *pvr_file,
 	if (err)
 		goto out_job_data_cleanup;
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0);
 
 	xa_init_flags(&signal_array, XA_FLAGS_ALLOC);
 
diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile
index d4d193f60271..5e4436403b8d 100644
--- a/drivers/gpu/drm/mediatek/Makefile
+++ b/drivers/gpu/drm/mediatek/Makefile
@@ -16,7 +16,8 @@ mediatek-drm-y := mtk_disp_aal.o \
 		  mtk_dsi.o \
 		  mtk_dpi.o \
 		  mtk_ethdr.o \
-		  mtk_mdp_rdma.o
+		  mtk_mdp_rdma.o \
+		  mtk_padding.o
 
 obj-$(CONFIG_DRM_MEDIATEK) += mediatek-drm.o
 
diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c
index f47f417d8ba6..8519e9bade36 100644
--- a/drivers/gpu/drm/mediatek/mtk_cec.c
+++ b/drivers/gpu/drm/mediatek/mtk_cec.c
@@ -185,7 +185,6 @@ static int mtk_cec_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct mtk_cec *cec;
-	struct resource *res;
 	int ret;
 
 	cec = devm_kzalloc(dev, sizeof(*cec), GFP_KERNEL);
@@ -195,8 +194,7 @@ static int mtk_cec_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, cec);
 	spin_lock_init(&cec->lock);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	cec->regs = devm_ioremap_resource(dev, res);
+	cec->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(cec->regs)) {
 		ret = PTR_ERR(cec->regs);
 		dev_err(dev, "Failed to ioremap cec: %d\n", ret);
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_aal.c b/drivers/gpu/drm/mediatek/mtk_disp_aal.c
index 2209159d8855..40fe403086c3 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_aal.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_aal.c
@@ -168,7 +168,6 @@ static int mtk_disp_aal_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct mtk_disp_aal *priv;
-	struct resource *res;
 	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -181,8 +180,7 @@ static int mtk_disp_aal_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->clk);
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->regs = devm_ioremap_resource(dev, res);
+	priv->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->regs)) {
 		dev_err(dev, "failed to ioremap aal\n");
 		return PTR_ERR(priv->regs);
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c b/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c
index 4234ff7485e8..465cddce0d32 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c
@@ -153,7 +153,6 @@ static int mtk_disp_ccorr_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct mtk_disp_ccorr *priv;
-	struct resource *res;
 	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -166,8 +165,7 @@ static int mtk_disp_ccorr_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->clk);
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->regs = devm_ioremap_resource(dev, res);
+	priv->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->regs)) {
 		dev_err(dev, "failed to ioremap ccorr\n");
 		return PTR_ERR(priv->regs);
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
index 1311562d25cc..74fa56339383 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h
+++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
@@ -110,6 +110,8 @@ void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev,
 			     unsigned int next);
 void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev,
 				unsigned int next);
+int mtk_ovl_adaptor_power_on(struct device *dev);
+void mtk_ovl_adaptor_power_off(struct device *dev);
 int mtk_ovl_adaptor_clk_enable(struct device *dev);
 void mtk_ovl_adaptor_clk_disable(struct device *dev);
 void mtk_ovl_adaptor_config(struct device *dev, unsigned int w,
@@ -151,6 +153,8 @@ void mtk_rdma_disable_vblank(struct device *dev);
 const u32 *mtk_rdma_get_formats(struct device *dev);
 size_t mtk_rdma_get_num_formats(struct device *dev);
 
+int mtk_mdp_rdma_power_on(struct device *dev);
+void mtk_mdp_rdma_power_off(struct device *dev);
 int mtk_mdp_rdma_clk_enable(struct device *dev);
 void mtk_mdp_rdma_clk_disable(struct device *dev);
 void mtk_mdp_rdma_start(struct device *dev, struct cmdq_pkt *cmdq_pkt);
@@ -160,4 +164,8 @@ void mtk_mdp_rdma_config(struct device *dev, struct mtk_mdp_rdma_cfg *cfg,
 const u32 *mtk_mdp_rdma_get_formats(struct device *dev);
 size_t mtk_mdp_rdma_get_num_formats(struct device *dev);
 
+int mtk_padding_clk_enable(struct device *dev);
+void mtk_padding_clk_disable(struct device *dev);
+void mtk_padding_start(struct device *dev);
+void mtk_padding_stop(struct device *dev);
 #endif
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_merge.c b/drivers/gpu/drm/mediatek/mtk_disp_merge.c
index e525a6b9e5b0..22f768d923d5 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_merge.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_merge.c
@@ -103,7 +103,7 @@ void mtk_merge_stop_cmdq(struct device *dev, struct cmdq_pkt *cmdq_pkt)
 	mtk_ddp_write(cmdq_pkt, 0, &priv->cmdq_reg, priv->regs,
 		      DISP_REG_MERGE_CTRL);
 
-	if (priv->async_clk)
+	if (!cmdq_pkt && priv->async_clk)
 		reset_control_reset(priv->reset_ctl);
 }
 
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
index 3fdef3ad4ffd..12a37f740bf4 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c
@@ -27,13 +27,14 @@
 #define MTK_OVL_ADAPTOR_LAYER_NUM 4
 
 enum mtk_ovl_adaptor_comp_type {
-	OVL_ADAPTOR_TYPE_RDMA = 0,
-	OVL_ADAPTOR_TYPE_MERGE,
 	OVL_ADAPTOR_TYPE_ETHDR,
+	OVL_ADAPTOR_TYPE_MDP_RDMA,
+	OVL_ADAPTOR_TYPE_MERGE,
 	OVL_ADAPTOR_TYPE_NUM,
 };
 
 enum mtk_ovl_adaptor_comp_id {
+	OVL_ADAPTOR_ETHDR0,
 	OVL_ADAPTOR_MDP_RDMA0,
 	OVL_ADAPTOR_MDP_RDMA1,
 	OVL_ADAPTOR_MDP_RDMA2,
@@ -46,13 +47,14 @@ enum mtk_ovl_adaptor_comp_id {
 	OVL_ADAPTOR_MERGE1,
 	OVL_ADAPTOR_MERGE2,
 	OVL_ADAPTOR_MERGE3,
-	OVL_ADAPTOR_ETHDR0,
 	OVL_ADAPTOR_ID_MAX
 };
 
 struct ovl_adaptor_comp_match {
 	enum mtk_ovl_adaptor_comp_type type;
+	enum mtk_ddp_comp_id comp_id;
 	int alias_id;
+	const struct mtk_ddp_comp_funcs *funcs;
 };
 
 struct mtk_disp_ovl_adaptor {
@@ -62,25 +64,44 @@ struct mtk_disp_ovl_adaptor {
 };
 
 static const char * const private_comp_stem[OVL_ADAPTOR_TYPE_NUM] = {
-	[OVL_ADAPTOR_TYPE_RDMA]		= "vdo1-rdma",
-	[OVL_ADAPTOR_TYPE_MERGE]	= "merge",
 	[OVL_ADAPTOR_TYPE_ETHDR]	= "ethdr",
+	[OVL_ADAPTOR_TYPE_MDP_RDMA]	= "vdo1-rdma",
+	[OVL_ADAPTOR_TYPE_MERGE]	= "merge",
+};
+
+static const struct mtk_ddp_comp_funcs ethdr = {
+	.clk_enable = mtk_ethdr_clk_enable,
+	.clk_disable = mtk_ethdr_clk_disable,
+	.start = mtk_ethdr_start,
+	.stop = mtk_ethdr_stop,
+};
+
+static const struct mtk_ddp_comp_funcs merge = {
+	.clk_enable = mtk_merge_clk_enable,
+	.clk_disable = mtk_merge_clk_disable,
+};
+
+static const struct mtk_ddp_comp_funcs rdma = {
+	.power_on = mtk_mdp_rdma_power_on,
+	.power_off = mtk_mdp_rdma_power_off,
+	.clk_enable = mtk_mdp_rdma_clk_enable,
+	.clk_disable = mtk_mdp_rdma_clk_disable,
 };
 
 static const struct ovl_adaptor_comp_match comp_matches[OVL_ADAPTOR_ID_MAX] = {
-	[OVL_ADAPTOR_MDP_RDMA0]	= { OVL_ADAPTOR_TYPE_RDMA, 0 },
-	[OVL_ADAPTOR_MDP_RDMA1]	= { OVL_ADAPTOR_TYPE_RDMA, 1 },
-	[OVL_ADAPTOR_MDP_RDMA2]	= { OVL_ADAPTOR_TYPE_RDMA, 2 },
-	[OVL_ADAPTOR_MDP_RDMA3]	= { OVL_ADAPTOR_TYPE_RDMA, 3 },
-	[OVL_ADAPTOR_MDP_RDMA4]	= { OVL_ADAPTOR_TYPE_RDMA, 4 },
-	[OVL_ADAPTOR_MDP_RDMA5]	= { OVL_ADAPTOR_TYPE_RDMA, 5 },
-	[OVL_ADAPTOR_MDP_RDMA6]	= { OVL_ADAPTOR_TYPE_RDMA, 6 },
-	[OVL_ADAPTOR_MDP_RDMA7]	= { OVL_ADAPTOR_TYPE_RDMA, 7 },
-	[OVL_ADAPTOR_MERGE0]	= { OVL_ADAPTOR_TYPE_MERGE, 1 },
-	[OVL_ADAPTOR_MERGE1]	= { OVL_ADAPTOR_TYPE_MERGE, 2 },
-	[OVL_ADAPTOR_MERGE2]	= { OVL_ADAPTOR_TYPE_MERGE, 3 },
-	[OVL_ADAPTOR_MERGE3]	= { OVL_ADAPTOR_TYPE_MERGE, 4 },
-	[OVL_ADAPTOR_ETHDR0]	= { OVL_ADAPTOR_TYPE_ETHDR, 0 },
+	[OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, DDP_COMPONENT_ETHDR_MIXER, 0, &ethdr },
+	[OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA0, 0, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA1, 1, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA2, 2, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA3] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA3, 3, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA4] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA4, 4, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA5] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA5, 5, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA6] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA6, 6, &rdma },
+	[OVL_ADAPTOR_MDP_RDMA7] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA7, 7, &rdma },
+	[OVL_ADAPTOR_MERGE0] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE1, 1, &merge },
+	[OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE2, 2, &merge },
+	[OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE3, 3, &merge },
+	[OVL_ADAPTOR_MERGE3] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE4, 4, &merge },
 };
 
 void mtk_ovl_adaptor_layer_config(struct device *dev, unsigned int idx,
@@ -172,68 +193,112 @@ void mtk_ovl_adaptor_config(struct device *dev, unsigned int w,
 
 void mtk_ovl_adaptor_start(struct device *dev)
 {
+	int i;
 	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
 
-	mtk_ethdr_start(ovl_adaptor->ovl_adaptor_comp[OVL_ADAPTOR_ETHDR0]);
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i] ||
+		    !comp_matches[i].funcs->start)
+			continue;
+
+		comp_matches[i].funcs->start(ovl_adaptor->ovl_adaptor_comp[i]);
+	}
 }
 
 void mtk_ovl_adaptor_stop(struct device *dev)
 {
+	int i;
 	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
 
-	mtk_ethdr_stop(ovl_adaptor->ovl_adaptor_comp[OVL_ADAPTOR_ETHDR0]);
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i] ||
+		    !comp_matches[i].funcs->stop)
+			continue;
+
+		comp_matches[i].funcs->stop(ovl_adaptor->ovl_adaptor_comp[i]);
+	}
 }
 
-int mtk_ovl_adaptor_clk_enable(struct device *dev)
+/**
+ * power_off - Power off the devices in OVL adaptor
+ * @dev: Device to be powered off
+ * @num: Number of the devices to be powered off
+ *
+ * Calls the .power_off() ovl_adaptor component callback if it is present.
+ */
+static inline void power_off(struct device *dev, int num)
 {
 	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
-	struct device *comp;
-	int ret;
 	int i;
 
-	for (i = 0; i < OVL_ADAPTOR_MERGE0; i++) {
-		comp = ovl_adaptor->ovl_adaptor_comp[i];
-		ret = pm_runtime_get_sync(comp);
+	if (num > OVL_ADAPTOR_ID_MAX)
+		num = OVL_ADAPTOR_ID_MAX;
+
+	for (i = num - 1; i >= 0; i--) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i] ||
+		    !comp_matches[i].funcs->power_off)
+			continue;
+
+		comp_matches[i].funcs->power_off(ovl_adaptor->ovl_adaptor_comp[i]);
+	}
+}
+
+/**
+ * mtk_ovl_adaptor_power_on - Power on the devices in OVL adaptor
+ * @dev: Device to be powered on
+ *
+ * Different from OVL, OVL adaptor is a pseudo device so
+ * we didn't define it in the device tree, pm_runtime_resume_and_get()
+ * called by .atomic_enable() power on no device in OVL adaptor,
+ * we have to implement a function to do the job instead.
+ *
+ * Return: Zero for success or negative number for failure.
+ */
+int mtk_ovl_adaptor_power_on(struct device *dev)
+{
+	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
+	int i, ret;
+
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i] ||
+		    !comp_matches[i].funcs->power_on)
+			continue;
+
+		ret = comp_matches[i].funcs->power_on(ovl_adaptor->ovl_adaptor_comp[i]);
 		if (ret < 0) {
 			dev_err(dev, "Failed to enable power domain %d, err %d\n", i, ret);
-			goto pwr_err;
+			power_off(dev, i);
+			return ret;
 		}
 	}
+	return 0;
+}
+
+void mtk_ovl_adaptor_power_off(struct device *dev)
+{
+	power_off(dev, OVL_ADAPTOR_ID_MAX);
+}
+
+int mtk_ovl_adaptor_clk_enable(struct device *dev)
+{
+	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
+	struct device *comp;
+	int ret;
+	int i;
 
 	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
 		comp = ovl_adaptor->ovl_adaptor_comp[i];
-
-		if (i < OVL_ADAPTOR_MERGE0)
-			ret = mtk_mdp_rdma_clk_enable(comp);
-		else if (i < OVL_ADAPTOR_ETHDR0)
-			ret = mtk_merge_clk_enable(comp);
-		else
-			ret = mtk_ethdr_clk_enable(comp);
+		if (!comp || !comp_matches[i].funcs->clk_enable)
+			continue;
+		ret = comp_matches[i].funcs->clk_enable(comp);
 		if (ret) {
 			dev_err(dev, "Failed to enable clock %d, err %d\n", i, ret);
-			goto clk_err;
+			while (--i >= 0)
+				comp_matches[i].funcs->clk_disable(comp);
+			return ret;
 		}
 	}
-
-	return ret;
-
-clk_err:
-	while (--i >= 0) {
-		comp = ovl_adaptor->ovl_adaptor_comp[i];
-		if (i < OVL_ADAPTOR_MERGE0)
-			mtk_mdp_rdma_clk_disable(comp);
-		else if (i < OVL_ADAPTOR_ETHDR0)
-			mtk_merge_clk_disable(comp);
-		else
-			mtk_ethdr_clk_disable(comp);
-	}
-	i = OVL_ADAPTOR_MERGE0;
-
-pwr_err:
-	while (--i >= 0)
-		pm_runtime_put(ovl_adaptor->ovl_adaptor_comp[i]);
-
-	return ret;
+	return 0;
 }
 
 void mtk_ovl_adaptor_clk_disable(struct device *dev)
@@ -244,15 +309,11 @@ void mtk_ovl_adaptor_clk_disable(struct device *dev)
 
 	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
 		comp = ovl_adaptor->ovl_adaptor_comp[i];
-
-		if (i < OVL_ADAPTOR_MERGE0) {
-			mtk_mdp_rdma_clk_disable(comp);
+		if (!comp || !comp_matches[i].funcs->clk_disable)
+			continue;
+		comp_matches[i].funcs->clk_disable(comp);
+		if (i < OVL_ADAPTOR_MERGE0)
 			pm_runtime_put(comp);
-		} else if (i < OVL_ADAPTOR_ETHDR0) {
-			mtk_merge_clk_disable(comp);
-		} else {
-			mtk_ethdr_clk_disable(comp);
-		}
 	}
 }
 
@@ -314,40 +375,31 @@ size_t mtk_ovl_adaptor_get_num_formats(struct device *dev)
 
 void mtk_ovl_adaptor_add_comp(struct device *dev, struct mtk_mutex *mutex)
 {
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA0);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA1);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA2);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA3);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA4);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA5);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA6);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA7);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE1);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE2);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE3);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE4);
-	mtk_mutex_add_comp(mutex, DDP_COMPONENT_ETHDR_MIXER);
+	int i;
+	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
+
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i])
+			continue;
+		mtk_mutex_add_comp(mutex, comp_matches[i].comp_id);
+	}
 }
 
 void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex)
 {
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA0);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA1);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA2);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA3);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA4);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA5);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA6);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA7);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE1);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE2);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE3);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE4);
-	mtk_mutex_remove_comp(mutex, DDP_COMPONENT_ETHDR_MIXER);
+	int i;
+	struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev);
+
+	for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) {
+		if (!ovl_adaptor->ovl_adaptor_comp[i])
+			continue;
+		mtk_mutex_remove_comp(mutex, comp_matches[i].comp_id);
+	}
 }
 
 void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsigned int next)
 {
+	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA0, DDP_COMPONENT_MERGE1);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA1, DDP_COMPONENT_MERGE1);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA2, DDP_COMPONENT_MERGE2);
@@ -355,11 +407,11 @@ void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsig
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE2, DDP_COMPONENT_ETHDR_MIXER);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE3, DDP_COMPONENT_ETHDR_MIXER);
 	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE4, DDP_COMPONENT_ETHDR_MIXER);
-	mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next);
 }
 
 void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev, unsigned int next)
 {
+	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA0, DDP_COMPONENT_MERGE1);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA1, DDP_COMPONENT_MERGE1);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA2, DDP_COMPONENT_MERGE2);
@@ -367,7 +419,6 @@ void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev, un
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE2, DDP_COMPONENT_ETHDR_MIXER);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE3, DDP_COMPONENT_ETHDR_MIXER);
 	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE4, DDP_COMPONENT_ETHDR_MIXER);
-	mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next);
 }
 
 static int ovl_adaptor_comp_get_id(struct device *dev, struct device_node *node,
@@ -386,17 +437,10 @@ static int ovl_adaptor_comp_get_id(struct device *dev, struct device_node *node,
 }
 
 static const struct of_device_id mtk_ovl_adaptor_comp_dt_ids[] = {
-	{
-		.compatible = "mediatek,mt8195-vdo1-rdma",
-		.data = (void *)OVL_ADAPTOR_TYPE_RDMA,
-	}, {
-		.compatible = "mediatek,mt8195-disp-merge",
-		.data = (void *)OVL_ADAPTOR_TYPE_MERGE,
-	}, {
-		.compatible = "mediatek,mt8195-disp-ethdr",
-		.data = (void *)OVL_ADAPTOR_TYPE_ETHDR,
-	},
-	{},
+	{ .compatible = "mediatek,mt8195-disp-ethdr", .data = (void *)OVL_ADAPTOR_TYPE_ETHDR },
+	{ .compatible = "mediatek,mt8195-disp-merge", .data = (void *)OVL_ADAPTOR_TYPE_MERGE },
+	{ .compatible = "mediatek,mt8195-vdo1-rdma", .data = (void *)OVL_ADAPTOR_TYPE_MDP_RDMA },
+	{ /* sentinel */ }
 };
 
 static int compare_of(struct device *dev, void *data)
diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c
index e4c16ba9902d..2136a596efa1 100644
--- a/drivers/gpu/drm/mediatek/mtk_dp.c
+++ b/drivers/gpu/drm/mediatek/mtk_dp.c
@@ -2818,3 +2818,4 @@ MODULE_AUTHOR("Markus Schneider-Pargmann <msp@baylibre.com>");
 MODULE_AUTHOR("Bo-Chen Chen <rex-bc.chen@mediatek.com>");
 MODULE_DESCRIPTION("MediaTek DisplayPort Driver");
 MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: phy_mtk_dp");
diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c
index 4e3d9f7b4d8c..beb7d9d08e97 100644
--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
@@ -966,20 +966,6 @@ static const struct mtk_dpi_conf mt8186_conf = {
 	.csc_enable_bit = CSC_ENABLE,
 };
 
-static const struct mtk_dpi_conf mt8188_dpintf_conf = {
-	.cal_factor = mt8195_dpintf_calculate_factor,
-	.max_clock_khz = 600000,
-	.output_fmts = mt8195_output_fmts,
-	.num_output_fmts = ARRAY_SIZE(mt8195_output_fmts),
-	.pixels_per_iter = 4,
-	.input_2pixel = false,
-	.dimension_mask = DPINTF_HPW_MASK,
-	.hvsize_mask = DPINTF_HSIZE_MASK,
-	.channel_swap_shift = DPINTF_CH_SWAP,
-	.yuv422_en_bit = DPINTF_YUV422_EN,
-	.csc_enable_bit = DPINTF_CSC_ENABLE,
-};
-
 static const struct mtk_dpi_conf mt8192_conf = {
 	.cal_factor = mt8183_calculate_factor,
 	.reg_h_fre_con = 0xe0,
@@ -1103,7 +1089,7 @@ static const struct of_device_id mtk_dpi_of_ids[] = {
 	{ .compatible = "mediatek,mt8173-dpi", .data = &mt8173_conf },
 	{ .compatible = "mediatek,mt8183-dpi", .data = &mt8183_conf },
 	{ .compatible = "mediatek,mt8186-dpi", .data = &mt8186_conf },
-	{ .compatible = "mediatek,mt8188-dp-intf", .data = &mt8188_dpintf_conf },
+	{ .compatible = "mediatek,mt8188-dp-intf", .data = &mt8195_dpintf_conf },
 	{ .compatible = "mediatek,mt8192-dpi", .data = &mt8192_conf },
 	{ .compatible = "mediatek,mt8195-dp-intf", .data = &mt8195_dpintf_conf },
 	{ /* sentinel */ },
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index c277b9fae950..4d5ff39dc2ef 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -721,7 +721,7 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc,
 
 	DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id);
 
-	ret = pm_runtime_resume_and_get(comp->dev);
+	ret = mtk_ddp_comp_power_on(comp);
 	if (ret < 0) {
 		DRM_DEV_ERROR(comp->dev, "Failed to enable power domain: %d\n", ret);
 		return;
@@ -731,7 +731,7 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc,
 
 	ret = mtk_crtc_ddp_hw_init(mtk_crtc);
 	if (ret) {
-		pm_runtime_put(comp->dev);
+		mtk_ddp_comp_power_off(comp);
 		return;
 	}
 
@@ -744,7 +744,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc,
 {
 	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
 	struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0];
-	int i, ret;
+	int i;
 
 	DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id);
 	if (!mtk_crtc->enabled)
@@ -774,9 +774,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc,
 
 	drm_crtc_vblank_off(crtc);
 	mtk_crtc_ddp_hw_fini(mtk_crtc);
-	ret = pm_runtime_put(comp->dev);
-	if (ret < 0)
-		DRM_DEV_ERROR(comp->dev, "Failed to disable power domain: %d\n", ret);
+	mtk_ddp_comp_power_off(comp);
 
 	mtk_crtc->enabled = false;
 }
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
index 3046c0409353..a9b5a21cde2d 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
@@ -398,6 +398,8 @@ static const struct mtk_ddp_comp_funcs ddp_ufoe = {
 };
 
 static const struct mtk_ddp_comp_funcs ddp_ovl_adaptor = {
+	.power_on = mtk_ovl_adaptor_power_on,
+	.power_off = mtk_ovl_adaptor_power_off,
 	.clk_enable = mtk_ovl_adaptor_clk_enable,
 	.clk_disable = mtk_ovl_adaptor_clk_disable,
 	.config = mtk_ovl_adaptor_config,
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
index 4bae55bdb034..15b2eafff438 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
@@ -7,6 +7,7 @@
 #define MTK_DRM_DDP_COMP_H
 
 #include <linux/io.h>
+#include <linux/pm_runtime.h>
 #include <linux/soc/mediatek/mtk-cmdq.h>
 #include <linux/soc/mediatek/mtk-mmsys.h>
 #include <linux/soc/mediatek/mtk-mutex.h>
@@ -46,6 +47,8 @@ enum mtk_ddp_comp_type {
 struct mtk_ddp_comp;
 struct cmdq_pkt;
 struct mtk_ddp_comp_funcs {
+	int (*power_on)(struct device *dev);
+	void (*power_off)(struct device *dev);
 	int (*clk_enable)(struct device *dev);
 	void (*clk_disable)(struct device *dev);
 	void (*config)(struct device *dev, unsigned int w,
@@ -92,6 +95,23 @@ struct mtk_ddp_comp {
 	const struct mtk_ddp_comp_funcs *funcs;
 };
 
+static inline int mtk_ddp_comp_power_on(struct mtk_ddp_comp *comp)
+{
+	if (comp->funcs && comp->funcs->power_on)
+		return comp->funcs->power_on(comp->dev);
+	else
+		return pm_runtime_resume_and_get(comp->dev);
+	return 0;
+}
+
+static inline void mtk_ddp_comp_power_off(struct mtk_ddp_comp *comp)
+{
+	if (comp->funcs && comp->funcs->power_off)
+		comp->funcs->power_off(comp->dev);
+	else
+		pm_runtime_put(comp->dev);
+}
+
 static inline int mtk_ddp_comp_clk_enable(struct mtk_ddp_comp *comp)
 {
 	if (comp->funcs && comp->funcs->clk_enable)
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index 2dfaa613276a..f7504d1edc62 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -5,7 +5,6 @@
  */
 
 #include <linux/component.h>
-#include <linux/iommu.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
@@ -608,9 +607,6 @@ static int mtk_drm_bind(struct device *dev)
 	struct drm_device *drm;
 	int ret, i;
 
-	if (!iommu_present(&platform_bus_type))
-		return -EPROBE_DEFER;
-
 	pdev = of_find_device_by_node(private->mutex_node);
 	if (!pdev) {
 		dev_err(dev, "Waiting for disp-mutex device %pOF\n",
@@ -1000,6 +996,7 @@ static struct platform_driver * const mtk_drm_drivers[] = {
 	&mtk_dsi_driver,
 	&mtk_ethdr_driver,
 	&mtk_mdp_rdma_driver,
+	&mtk_padding_driver,
 };
 
 static int __init mtk_drm_init(void)
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.h b/drivers/gpu/drm/mediatek/mtk_drm_drv.h
index 6f98fff4f1a4..33fadb08dc1c 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.h
@@ -77,5 +77,5 @@ extern struct platform_driver mtk_dpi_driver;
 extern struct platform_driver mtk_dsi_driver;
 extern struct platform_driver mtk_ethdr_driver;
 extern struct platform_driver mtk_mdp_rdma_driver;
-
+extern struct platform_driver mtk_padding_driver;
 #endif /* MTK_DRM_DRV_H */
diff --git a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
index c3adaeefd551..ee9ce9b6d078 100644
--- a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
+++ b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
@@ -242,12 +242,27 @@ size_t mtk_mdp_rdma_get_num_formats(struct device *dev)
 	return ARRAY_SIZE(formats);
 }
 
+int mtk_mdp_rdma_power_on(struct device *dev)
+{
+	int ret = pm_runtime_resume_and_get(dev);
+
+	if (ret < 0) {
+		dev_err(dev, "Failed to power on: %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+void mtk_mdp_rdma_power_off(struct device *dev)
+{
+	pm_runtime_put(dev);
+}
+
 int mtk_mdp_rdma_clk_enable(struct device *dev)
 {
 	struct mtk_mdp_rdma *rdma = dev_get_drvdata(dev);
 
-	clk_prepare_enable(rdma->clk);
-	return 0;
+	return clk_prepare_enable(rdma->clk);
 }
 
 void mtk_mdp_rdma_clk_disable(struct device *dev)
diff --git a/drivers/gpu/drm/mediatek/mtk_padding.c b/drivers/gpu/drm/mediatek/mtk_padding.c
new file mode 100644
index 000000000000..0d6451c149b6
--- /dev/null
+++ b/drivers/gpu/drm/mediatek/mtk_padding.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 MediaTek Inc.
+ */
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/soc/mediatek/mtk-cmdq.h>
+
+#include "mtk_disp_drv.h"
+#include "mtk_drm_crtc.h"
+#include "mtk_drm_ddp_comp.h"
+
+#define PADDING_CONTROL_REG	0x00
+#define PADDING_BYPASS			BIT(0)
+#define PADDING_ENABLE			BIT(1)
+#define PADDING_PIC_SIZE_REG	0x04
+#define PADDING_H_REG		0x08 /* horizontal */
+#define PADDING_V_REG		0x0c /* vertical */
+#define PADDING_COLOR_REG	0x10
+
+/**
+ * struct mtk_padding - Basic information of the Padding
+ * @clk: Clock of the module
+ * @reg: Virtual address of the Padding for CPU to access
+ * @cmdq_reg: CMDQ setting of the Padding
+ *
+ * Every Padding should have different clock source, register base, and
+ * CMDQ settings, we stored these differences all together.
+ */
+struct mtk_padding {
+	struct clk		*clk;
+	void __iomem		*reg;
+	struct cmdq_client_reg	cmdq_reg;
+};
+
+int mtk_padding_clk_enable(struct device *dev)
+{
+	struct mtk_padding *padding = dev_get_drvdata(dev);
+
+	return clk_prepare_enable(padding->clk);
+}
+
+void mtk_padding_clk_disable(struct device *dev)
+{
+	struct mtk_padding *padding = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(padding->clk);
+}
+
+void mtk_padding_start(struct device *dev)
+{
+	struct mtk_padding *padding = dev_get_drvdata(dev);
+
+	writel(PADDING_ENABLE | PADDING_BYPASS,
+	       padding->reg + PADDING_CONTROL_REG);
+
+	/*
+	 * Notice that even the padding is in bypass mode,
+	 * all the settings must be cleared to 0 or
+	 * undefined behaviors could happen
+	 */
+	writel(0, padding->reg + PADDING_PIC_SIZE_REG);
+	writel(0, padding->reg + PADDING_H_REG);
+	writel(0, padding->reg + PADDING_V_REG);
+	writel(0, padding->reg + PADDING_COLOR_REG);
+}
+
+void mtk_padding_stop(struct device *dev)
+{
+	struct mtk_padding *padding = dev_get_drvdata(dev);
+
+	writel(0, padding->reg + PADDING_CONTROL_REG);
+}
+
+static int mtk_padding_bind(struct device *dev, struct device *master, void *data)
+{
+	return 0;
+}
+
+static void mtk_padding_unbind(struct device *dev, struct device *master, void *data)
+{
+}
+
+static const struct component_ops mtk_padding_component_ops = {
+	.bind	= mtk_padding_bind,
+	.unbind = mtk_padding_unbind,
+};
+
+static int mtk_padding_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mtk_padding *priv;
+	struct resource *res;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(dev, "failed to get clk\n");
+		return PTR_ERR(priv->clk);
+	}
+
+	priv->reg = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(priv->reg)) {
+		dev_err(dev, "failed to do ioremap\n");
+		return PTR_ERR(priv->reg);
+	}
+
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+	ret = cmdq_dev_get_client_reg(dev, &priv->cmdq_reg, 0);
+	if (ret) {
+		dev_err(dev, "failed to get gce client reg\n");
+		return ret;
+	}
+#endif
+
+	platform_set_drvdata(pdev, priv);
+
+	ret = devm_pm_runtime_enable(dev);
+	if (ret)
+		return ret;
+
+	ret = component_add(dev, &mtk_padding_component_ops);
+	if (ret) {
+		pm_runtime_disable(dev);
+		return dev_err_probe(dev, ret, "failed to add component\n");
+	}
+
+	return 0;
+}
+
+static int mtk_padding_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &mtk_padding_component_ops);
+	return 0;
+}
+
+static const struct of_device_id mtk_padding_driver_dt_match[] = {
+	{ .compatible = "mediatek,mt8188-disp-padding" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mtk_padding_driver_dt_match);
+
+struct platform_driver mtk_padding_driver = {
+	.probe		= mtk_padding_probe,
+	.remove		= mtk_padding_remove,
+	.driver		= {
+		.name	= "mediatek-disp-padding",
+		.owner	= THIS_MODULE,
+		.of_match_table = mtk_padding_driver_dt_match,
+	},
+};
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 6309a857ca31..f202f26adab2 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -6,6 +6,7 @@ config DRM_MSM
 	depends on ARCH_QCOM || SOC_IMX5 || COMPILE_TEST
 	depends on COMMON_CLK
 	depends on IOMMU_SUPPORT
+	depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n
 	depends on QCOM_OCMEM || QCOM_OCMEM=n
 	depends on QCOM_LLCC || QCOM_LLCC=n
 	depends on QCOM_COMMAND_DB || QCOM_COMMAND_DB=n
@@ -16,6 +17,7 @@ config DRM_MSM
 	select DRM_DP_AUX_BUS
 	select DRM_DISPLAY_DP_HELPER
 	select DRM_DISPLAY_HELPER
+	select DRM_EXEC
 	select DRM_KMS_HELPER
 	select DRM_PANEL
 	select DRM_BRIDGE
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 49671364fdcf..b1173128b5b9 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -63,6 +63,7 @@ msm-$(CONFIG_DRM_MSM_DPU) += \
 	disp/dpu1/dpu_encoder_phys_wb.o \
 	disp/dpu1/dpu_formats.o \
 	disp/dpu1/dpu_hw_catalog.o \
+	disp/dpu1/dpu_hw_cdm.o \
 	disp/dpu1/dpu_hw_ctl.o \
 	disp/dpu1/dpu_hw_dsc.o \
 	disp/dpu1/dpu_hw_dsc_1_2.o \
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index e5916c106796..c003f970189b 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -684,7 +684,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
-	u32 regbit;
+	u32 hbb;
 	int ret;
 
 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
@@ -820,18 +820,15 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 
 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 
-	/* Set the highest bank bit */
-	if (adreno_is_a540(adreno_gpu) || adreno_is_a530(adreno_gpu))
-		regbit = 2;
-	else
-		regbit = 1;
+	BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13);
+	hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13;
 
-	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, regbit << 7);
-	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, regbit << 1);
+	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, hbb << 7);
+	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, hbb << 1);
 
 	if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
 	    adreno_is_a540(adreno_gpu))
-		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, regbit);
+		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, hbb);
 
 	/* Disable All flat shading optimization (ALLFLATOPTDIS) */
 	gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10));
@@ -1785,5 +1782,11 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
 	/* Set up the preemption specific bits and pieces for each ringbuffer */
 	a5xx_preempt_init(gpu);
 
+	/* Set the highest bank bit */
+	if (adreno_is_a540(adreno_gpu) || adreno_is_a530(adreno_gpu))
+		adreno_gpu->ubwc_config.highest_bank_bit = 15;
+	else
+		adreno_gpu->ubwc_config.highest_bank_bit = 14;
+
 	return gpu;
 }
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 7a0220d29a23..c0bc924cd302 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1270,87 +1270,92 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]);
 }
 
-static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
+static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
 {
-	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 	/* Unknown, introduced with A650 family, related to UBWC mode/ver 4 */
-	u32 rgb565_predicator = 0;
+	gpu->ubwc_config.rgb565_predicator = 0;
 	/* Unknown, introduced with A650 family */
-	u32 uavflagprd_inv = 0;
+	gpu->ubwc_config.uavflagprd_inv = 0;
 	/* Whether the minimum access length is 64 bits */
-	u32 min_acc_len = 0;
+	gpu->ubwc_config.min_acc_len = 0;
 	/* Entirely magic, per-GPU-gen value */
-	u32 ubwc_mode = 0;
+	gpu->ubwc_config.ubwc_mode = 0;
 	/*
 	 * The Highest Bank Bit value represents the bit of the highest DDR bank.
-	 * We then subtract 13 from it (13 is the minimum value allowed by hw) and
-	 * write the lowest two bits of the remaining value as hbb_lo and the
-	 * one above it as hbb_hi to the hardware. This should ideally use DRAM
-	 * type detection.
+	 * This should ideally use DRAM type detection.
 	 */
-	u32 hbb_hi = 0;
-	u32 hbb_lo = 2;
-	/* Unknown, introduced with A640/680 */
-	u32 amsbc = 0;
+	gpu->ubwc_config.highest_bank_bit = 15;
 
-	if (adreno_is_a610(adreno_gpu)) {
-		/* HBB = 14 */
-		hbb_lo = 1;
-		min_acc_len = 1;
-		ubwc_mode = 1;
+	if (adreno_is_a610(gpu)) {
+		gpu->ubwc_config.highest_bank_bit = 14;
+		gpu->ubwc_config.min_acc_len = 1;
+		gpu->ubwc_config.ubwc_mode = 1;
 	}
 
 	/* a618 is using the hw default values */
-	if (adreno_is_a618(adreno_gpu))
+	if (adreno_is_a618(gpu))
 		return;
 
-	if (adreno_is_a619_holi(adreno_gpu))
-		hbb_lo = 0;
+	if (adreno_is_a619_holi(gpu))
+		gpu->ubwc_config.highest_bank_bit = 13;
 
-	if (adreno_is_a640_family(adreno_gpu))
-		amsbc = 1;
+	if (adreno_is_a640_family(gpu))
+		gpu->ubwc_config.amsbc = 1;
 
-	if (adreno_is_a650(adreno_gpu) ||
-	    adreno_is_a660(adreno_gpu) ||
-	    adreno_is_a730(adreno_gpu) ||
-	    adreno_is_a740_family(adreno_gpu)) {
+	if (adreno_is_a650(gpu) ||
+	    adreno_is_a660(gpu) ||
+	    adreno_is_a690(gpu) ||
+	    adreno_is_a730(gpu) ||
+	    adreno_is_a740_family(gpu)) {
 		/* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
-		hbb_lo = 3;
-		amsbc = 1;
-		rgb565_predicator = 1;
-		uavflagprd_inv = 2;
+		gpu->ubwc_config.highest_bank_bit = 16;
+		gpu->ubwc_config.amsbc = 1;
+		gpu->ubwc_config.rgb565_predicator = 1;
+		gpu->ubwc_config.uavflagprd_inv = 2;
 	}
 
-	if (adreno_is_a690(adreno_gpu)) {
-		hbb_lo = 2;
-		amsbc = 1;
-		rgb565_predicator = 1;
-		uavflagprd_inv = 2;
+	if (adreno_is_7c3(gpu)) {
+		gpu->ubwc_config.highest_bank_bit = 14;
+		gpu->ubwc_config.amsbc = 1;
+		gpu->ubwc_config.rgb565_predicator = 1;
+		gpu->ubwc_config.uavflagprd_inv = 2;
 	}
+}
 
-	if (adreno_is_7c3(adreno_gpu)) {
-		hbb_lo = 1;
-		amsbc = 1;
-		rgb565_predicator = 1;
-		uavflagprd_inv = 2;
-	}
+static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	/*
+	 * We subtract 13 from the highest bank bit (13 is the minimum value
+	 * allowed by hw) and write the lowest two bits of the remaining value
+	 * as hbb_lo and the one above it as hbb_hi to the hardware.
+	 */
+	BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13);
+	u32 hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13;
+	u32 hbb_hi = hbb >> 2;
+	u32 hbb_lo = hbb & 3;
 
 	gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
-		  rgb565_predicator << 11 | hbb_hi << 10 | amsbc << 4 |
-		  min_acc_len << 3 | hbb_lo << 1 | ubwc_mode);
+		  adreno_gpu->ubwc_config.rgb565_predicator << 11 |
+		  hbb_hi << 10 | adreno_gpu->ubwc_config.amsbc << 4 |
+		  adreno_gpu->ubwc_config.min_acc_len << 3 |
+		  hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode);
 
 	gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, hbb_hi << 4 |
-		  min_acc_len << 3 | hbb_lo << 1 | ubwc_mode);
+		  adreno_gpu->ubwc_config.min_acc_len << 3 |
+		  hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode);
 
 	gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, hbb_hi << 10 |
-		  uavflagprd_inv << 4 | min_acc_len << 3 |
-		  hbb_lo << 1 | ubwc_mode);
+		  adreno_gpu->ubwc_config.uavflagprd_inv << 4 |
+		  adreno_gpu->ubwc_config.min_acc_len << 3 |
+		  hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode);
 
 	if (adreno_is_a7xx(adreno_gpu))
 		gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL,
 			  FIELD_PREP(GENMASK(8, 5), hbb_lo));
 
-	gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, min_acc_len << 23 | hbb_lo << 21);
+	gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL,
+		  adreno_gpu->ubwc_config.min_acc_len << 23 | hbb_lo << 21);
 }
 
 static int a6xx_cp_init(struct msm_gpu *gpu)
@@ -1741,7 +1746,9 @@ static int hw_init(struct msm_gpu *gpu)
 	/* Setting the primFifo thresholds default values,
 	 * and vccCacheSkipDis=1 bit (0x200) for A640 and newer
 	*/
-	if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu) || adreno_is_a690(adreno_gpu))
+	if (adreno_is_a690(adreno_gpu))
+		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00800200);
+	else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
 		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
 	else if (adreno_is_a640_family(adreno_gpu) || adreno_is_7c3(adreno_gpu))
 		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200);
@@ -1775,6 +1782,8 @@ static int hw_init(struct msm_gpu *gpu)
 	if (adreno_is_a730(adreno_gpu) ||
 	    adreno_is_a740_family(adreno_gpu))
 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff);
+	else if (adreno_is_a690(adreno_gpu))
+		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff);
 	else if (adreno_is_a619(adreno_gpu))
 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff);
 	else if (adreno_is_a610(adreno_gpu))
@@ -1782,7 +1791,7 @@ static int hw_init(struct msm_gpu *gpu)
 	else
 		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff);
 
-	gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1);
+	gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
 
 	/* Set weights for bicubic filtering */
 	if (adreno_is_a650_family(adreno_gpu)) {
@@ -1808,12 +1817,17 @@ static int hw_init(struct msm_gpu *gpu)
 	a6xx_set_cp_protect(gpu);
 
 	if (adreno_is_a660_family(adreno_gpu)) {
-		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
+		if (adreno_is_a690(adreno_gpu))
+			gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801);
+		else
+			gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
 		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
 	}
 
+	if (adreno_is_a690(adreno_gpu))
+		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90);
 	/* Set dualQ + disable afull for A660 GPU */
-	if (adreno_is_a660(adreno_gpu))
+	else if (adreno_is_a660(adreno_gpu))
 		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
 	else if (adreno_is_a7xx(adreno_gpu))
 		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG,
@@ -2908,5 +2922,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
 				a6xx_fault_handler);
 
+	a6xx_calc_ubwc_config(adreno_gpu);
+
 	return gpu;
 }
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index f62ab5257e66..2ce7d7b1690d 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -464,7 +464,7 @@ static const struct adreno_info gpulist[] = {
 			{ 190, 1 },
 		),
 	}, {
-		.chip_ids = ADRENO_CHIP_IDS(0x06080000),
+		.chip_ids = ADRENO_CHIP_IDS(0x06080001),
 		.family = ADRENO_6XX_GEN2,
 		.revn = 680,
 		.fw = {
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 3fe9fd240cc7..074fb498706f 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -373,6 +373,9 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
 			return -EINVAL;
 		*value = ctx->aspace->va_size;
 		return 0;
+	case MSM_PARAM_HIGHEST_BANK_BIT:
+		*value = adreno_gpu->ubwc_config.highest_bank_bit;
+		return 0;
 	default:
 		DBG("%s: invalid param: %u", gpu->name, param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 80b3f6312116..bc14df96feb0 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -165,6 +165,15 @@ struct adreno_gpu {
 	/* firmware: */
 	const struct firmware *fw[ADRENO_FW_MAX];
 
+	struct {
+		u32 rgb565_predicator;
+		u32 uavflagprd_inv;
+		u32 min_acc_len;
+		u32 ubwc_mode;
+		u32 highest_bank_bit;
+		u32 amsbc;
+	} ubwc_config;
+
 	/*
 	 * Register offsets are different between some GPUs.
 	 * GPU specific offsets will be exported by GPU specific
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h
new file mode 100644
index 000000000000..eb5dfff2ec4f
--- /dev/null
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h
@@ -0,0 +1,457 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022. Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2015-2018, 2020 The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DPU_10_0_SM8650_H
+#define _DPU_10_0_SM8650_H
+
+static const struct dpu_caps sm8650_dpu_caps = {
+	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
+	.max_mixer_blendstages = 0xb,
+	.has_src_split = true,
+	.has_dim_layer = true,
+	.has_idle_pc = true,
+	.has_3d_merge = true,
+	.max_linewidth = 8192,
+	.pixel_ram_size = DEFAULT_PIXEL_RAM_SIZE,
+};
+
+static const struct dpu_mdp_cfg sm8650_mdp = {
+	.name = "top_0",
+	.base = 0, .len = 0x494,
+	.features = BIT(DPU_MDP_PERIPH_0_REMOVED),
+	.clk_ctrls = {
+		[DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 },
+	},
+};
+
+/* FIXME: get rid of DPU_CTL_SPLIT_DISPLAY in favour of proper ACTIVE_CTL support */
+static const struct dpu_ctl_cfg sm8650_ctl[] = {
+	{
+		.name = "ctl_0", .id = CTL_0,
+		.base = 0x15000, .len = 0x1000,
+		.features = CTL_SM8550_MASK | BIT(DPU_CTL_SPLIT_DISPLAY),
+		.intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 9),
+	}, {
+		.name = "ctl_1", .id = CTL_1,
+		.base = 0x16000, .len = 0x1000,
+		.features = CTL_SM8550_MASK | BIT(DPU_CTL_SPLIT_DISPLAY),
+		.intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 10),
+	}, {
+		.name = "ctl_2", .id = CTL_2,
+		.base = 0x17000, .len = 0x1000,
+		.features = CTL_SM8550_MASK,
+		.intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 11),
+	}, {
+		.name = "ctl_3", .id = CTL_3,
+		.base = 0x18000, .len = 0x1000,
+		.features = CTL_SM8550_MASK,
+		.intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 12),
+	}, {
+		.name = "ctl_4", .id = CTL_4,
+		.base = 0x19000, .len = 0x1000,
+		.features = CTL_SM8550_MASK,
+		.intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 13),
+	}, {
+		.name = "ctl_5", .id = CTL_5,
+		.base = 0x1a000, .len = 0x1000,
+		.features = CTL_SM8550_MASK,
+		.intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 23),
+	},
+};
+
+static const struct dpu_sspp_cfg sm8650_sspp[] = {
+	{
+		.name = "sspp_0", .id = SSPP_VIG0,
+		.base = 0x4000, .len = 0x344,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_3,
+		.xin_id = 0,
+		.type = SSPP_TYPE_VIG,
+	}, {
+		.name = "sspp_1", .id = SSPP_VIG1,
+		.base = 0x6000, .len = 0x344,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_3,
+		.xin_id = 4,
+		.type = SSPP_TYPE_VIG,
+	}, {
+		.name = "sspp_2", .id = SSPP_VIG2,
+		.base = 0x8000, .len = 0x344,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_3,
+		.xin_id = 8,
+		.type = SSPP_TYPE_VIG,
+	}, {
+		.name = "sspp_3", .id = SSPP_VIG3,
+		.base = 0xa000, .len = 0x344,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_3,
+		.xin_id = 12,
+		.type = SSPP_TYPE_VIG,
+	}, {
+		.name = "sspp_8", .id = SSPP_DMA0,
+		.base = 0x24000, .len = 0x344,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 1,
+		.type = SSPP_TYPE_DMA,
+	}, {
+		.name = "sspp_9", .id = SSPP_DMA1,
+		.base = 0x26000, .len = 0x344,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 5,
+		.type = SSPP_TYPE_DMA,
+	}, {
+		.name = "sspp_10", .id = SSPP_DMA2,
+		.base = 0x28000, .len = 0x344,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 9,
+		.type = SSPP_TYPE_DMA,
+	}, {
+		.name = "sspp_11", .id = SSPP_DMA3,
+		.base = 0x2a000, .len = 0x344,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 13,
+		.type = SSPP_TYPE_DMA,
+	}, {
+		.name = "sspp_12", .id = SSPP_DMA4,
+		.base = 0x2c000, .len = 0x344,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 14,
+		.type = SSPP_TYPE_DMA,
+	}, {
+		.name = "sspp_13", .id = SSPP_DMA5,
+		.base = 0x2e000, .len = 0x344,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 15,
+		.type = SSPP_TYPE_DMA,
+	},
+};
+
+static const struct dpu_lm_cfg sm8650_lm[] = {
+	{
+		.name = "lm_0", .id = LM_0,
+		.base = 0x44000, .len = 0x400,
+		.features = MIXER_SDM845_MASK,
+		.sblk = &sdm845_lm_sblk,
+		.lm_pair = LM_1,
+		.pingpong = PINGPONG_0,
+		.dspp = DSPP_0,
+	}, {
+		.name = "lm_1", .id = LM_1,
+		.base = 0x45000, .len = 0x400,
+		.features = MIXER_SDM845_MASK,
+		.sblk = &sdm845_lm_sblk,
+		.lm_pair = LM_0,
+		.pingpong = PINGPONG_1,
+		.dspp = DSPP_1,
+	}, {
+		.name = "lm_2", .id = LM_2,
+		.base = 0x46000, .len = 0x400,
+		.features = MIXER_SDM845_MASK,
+		.sblk = &sdm845_lm_sblk,
+		.lm_pair = LM_3,
+		.pingpong = PINGPONG_2,
+	}, {
+		.name = "lm_3", .id = LM_3,
+		.base = 0x47000, .len = 0x400,
+		.features = MIXER_SDM845_MASK,
+		.sblk = &sdm845_lm_sblk,
+		.lm_pair = LM_2,
+		.pingpong = PINGPONG_3,
+	}, {
+		.name = "lm_4", .id = LM_4,
+		.base = 0x48000, .len = 0x400,
+		.features = MIXER_SDM845_MASK,
+		.sblk = &sdm845_lm_sblk,
+		.lm_pair = LM_5,
+		.pingpong = PINGPONG_4,
+	}, {
+		.name = "lm_5", .id = LM_5,
+		.base = 0x49000, .len = 0x400,
+		.features = MIXER_SDM845_MASK,
+		.sblk = &sdm845_lm_sblk,
+		.lm_pair = LM_4,
+		.pingpong = PINGPONG_5,
+	},
+};
+
+static const struct dpu_dspp_cfg sm8650_dspp[] = {
+	{
+		.name = "dspp_0", .id = DSPP_0,
+		.base = 0x54000, .len = 0x1800,
+		.features = DSPP_SC7180_MASK,
+		.sblk = &sdm845_dspp_sblk,
+	}, {
+		.name = "dspp_1", .id = DSPP_1,
+		.base = 0x56000, .len = 0x1800,
+		.features = DSPP_SC7180_MASK,
+		.sblk = &sdm845_dspp_sblk,
+	}, {
+		.name = "dspp_2", .id = DSPP_2,
+		.base = 0x58000, .len = 0x1800,
+		.features = DSPP_SC7180_MASK,
+		.sblk = &sdm845_dspp_sblk,
+	}, {
+		.name = "dspp_3", .id = DSPP_3,
+		.base = 0x5a000, .len = 0x1800,
+		.features = DSPP_SC7180_MASK,
+		.sblk = &sdm845_dspp_sblk,
+	},
+};
+
+static const struct dpu_pingpong_cfg sm8650_pp[] = {
+	{
+		.name = "pingpong_0", .id = PINGPONG_0,
+		.base = 0x69000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_0,
+		.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8),
+	}, {
+		.name = "pingpong_1", .id = PINGPONG_1,
+		.base = 0x6a000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_0,
+		.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9),
+	}, {
+		.name = "pingpong_2", .id = PINGPONG_2,
+		.base = 0x6b000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_1,
+		.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 10),
+	}, {
+		.name = "pingpong_3", .id = PINGPONG_3,
+		.base = 0x6c000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_1,
+		.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 11),
+	}, {
+		.name = "pingpong_4", .id = PINGPONG_4,
+		.base = 0x6d000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_2,
+		.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 30),
+	}, {
+		.name = "pingpong_5", .id = PINGPONG_5,
+		.base = 0x6e000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_2,
+		.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31),
+	}, {
+		.name = "pingpong_6", .id = PINGPONG_6,
+		.base = 0x66000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_3,
+	}, {
+		.name = "pingpong_7", .id = PINGPONG_7,
+		.base = 0x66400, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_3,
+	}, {
+		.name = "pingpong_8", .id = PINGPONG_8,
+		.base = 0x7e000, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_4,
+	}, {
+		.name = "pingpong_9", .id = PINGPONG_9,
+		.base = 0x7e400, .len = 0,
+		.features = BIT(DPU_PINGPONG_DITHER),
+		.sblk = &sc7280_pp_sblk,
+		.merge_3d = MERGE_3D_4,
+	},
+};
+
+static const struct dpu_merge_3d_cfg sm8650_merge_3d[] = {
+	{
+		.name = "merge_3d_0", .id = MERGE_3D_0,
+		.base = 0x4e000, .len = 0x8,
+	}, {
+		.name = "merge_3d_1", .id = MERGE_3D_1,
+		.base = 0x4f000, .len = 0x8,
+	}, {
+		.name = "merge_3d_2", .id = MERGE_3D_2,
+		.base = 0x50000, .len = 0x8,
+	}, {
+		.name = "merge_3d_3", .id = MERGE_3D_3,
+		.base = 0x66700, .len = 0x8,
+	}, {
+		.name = "merge_3d_4", .id = MERGE_3D_4,
+		.base = 0x7e700, .len = 0x8,
+	},
+};
+
+/*
+ * NOTE: Each display compression engine (DCE) contains dual hard
+ * slice DSC encoders so both share same base address but with
+ * its own different sub block address.
+ */
+static const struct dpu_dsc_cfg sm8650_dsc[] = {
+	{
+		.name = "dce_0_0", .id = DSC_0,
+		.base = 0x80000, .len = 0x6,
+		.features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN),
+		.sblk = &dsc_sblk_0,
+	}, {
+		.name = "dce_0_1", .id = DSC_1,
+		.base = 0x80000, .len = 0x6,
+		.features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN),
+		.sblk = &dsc_sblk_1,
+	}, {
+		.name = "dce_1_0", .id = DSC_2,
+		.base = 0x81000, .len = 0x6,
+		.features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN),
+		.sblk = &dsc_sblk_0,
+	}, {
+		.name = "dce_1_1", .id = DSC_3,
+		.base = 0x81000, .len = 0x6,
+		.features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN),
+		.sblk = &dsc_sblk_1,
+	}, {
+		.name = "dce_2_0", .id = DSC_4,
+		.base = 0x82000, .len = 0x6,
+		.features = BIT(DPU_DSC_HW_REV_1_2),
+		.sblk = &dsc_sblk_0,
+	}, {
+		.name = "dce_2_1", .id = DSC_5,
+		.base = 0x82000, .len = 0x6,
+		.features = BIT(DPU_DSC_HW_REV_1_2),
+		.sblk = &dsc_sblk_1,
+	},
+};
+
+static const struct dpu_wb_cfg sm8650_wb[] = {
+	{
+		.name = "wb_2", .id = WB_2,
+		.base = 0x65000, .len = 0x2c8,
+		.features = WB_SM8250_MASK,
+		.format_list = wb2_formats_rgb,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb),
+		.xin_id = 6,
+		.vbif_idx = VBIF_RT,
+		.maxlinewidth = 4096,
+		.intr_wb_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 4),
+	},
+};
+
+static const struct dpu_intf_cfg sm8650_intf[] = {
+	{
+		.name = "intf_0", .id = INTF_0,
+		.base = 0x34000, .len = 0x280,
+		.features = INTF_SC7280_MASK,
+		.type = INTF_DP,
+		.controller_id = MSM_DP_CONTROLLER_0,
+		.prog_fetch_lines_worst_case = 24,
+		.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24),
+		.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25),
+	}, {
+		.name = "intf_1", .id = INTF_1,
+		.base = 0x35000, .len = 0x300,
+		.features = INTF_SC7280_MASK,
+		.type = INTF_DSI,
+		.controller_id = MSM_DSI_CONTROLLER_0,
+		.prog_fetch_lines_worst_case = 24,
+		.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26),
+		.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27),
+		.intr_tear_rd_ptr = DPU_IRQ_IDX(MDP_INTF1_TEAR_INTR, 2),
+	}, {
+		.name = "intf_2", .id = INTF_2,
+		.base = 0x36000, .len = 0x300,
+		.features = INTF_SC7280_MASK,
+		.type = INTF_DSI,
+		.controller_id = MSM_DSI_CONTROLLER_1,
+		.prog_fetch_lines_worst_case = 24,
+		.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28),
+		.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29),
+		.intr_tear_rd_ptr = DPU_IRQ_IDX(MDP_INTF2_TEAR_INTR, 2),
+	}, {
+		.name = "intf_3", .id = INTF_3,
+		.base = 0x37000, .len = 0x280,
+		.features = INTF_SC7280_MASK,
+		.type = INTF_DP,
+		.controller_id = MSM_DP_CONTROLLER_1,
+		.prog_fetch_lines_worst_case = 24,
+		.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30),
+		.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31),
+	},
+};
+
+static const struct dpu_perf_cfg sm8650_perf_data = {
+	.max_bw_low = 17000000,
+	.max_bw_high = 27000000,
+	.min_core_ib = 2500000,
+	.min_llcc_ib = 0,
+	.min_dram_ib = 800000,
+	.min_prefill_lines = 35,
+	/* FIXME: lut tables */
+	.danger_lut_tbl = {0x3ffff, 0x3ffff, 0x0},
+	.safe_lut_tbl = {0xfe00, 0xfe00, 0xffff},
+	.qos_lut_tbl = {
+		{.nentry = ARRAY_SIZE(sc7180_qos_linear),
+		.entries = sc7180_qos_linear
+		},
+		{.nentry = ARRAY_SIZE(sc7180_qos_macrotile),
+		.entries = sc7180_qos_macrotile
+		},
+		{.nentry = ARRAY_SIZE(sc7180_qos_nrt),
+		.entries = sc7180_qos_nrt
+		},
+		/* TODO: macrotile-qseed is different from macrotile */
+	},
+	.cdp_cfg = {
+		{.rd_enable = 1, .wr_enable = 1},
+		{.rd_enable = 1, .wr_enable = 0}
+	},
+	.clk_inefficiency_factor = 105,
+	.bw_inefficiency_factor = 120,
+};
+
+static const struct dpu_mdss_version sm8650_mdss_ver = {
+	.core_major_ver = 10,
+	.core_minor_ver = 0,
+};
+
+const struct dpu_mdss_cfg dpu_sm8650_cfg = {
+	.mdss_ver = &sm8650_mdss_ver,
+	.caps = &sm8650_dpu_caps,
+	.mdp = &sm8650_mdp,
+	.ctl_count = ARRAY_SIZE(sm8650_ctl),
+	.ctl = sm8650_ctl,
+	.sspp_count = ARRAY_SIZE(sm8650_sspp),
+	.sspp = sm8650_sspp,
+	.mixer_count = ARRAY_SIZE(sm8650_lm),
+	.mixer = sm8650_lm,
+	.dspp_count = ARRAY_SIZE(sm8650_dspp),
+	.dspp = sm8650_dspp,
+	.pingpong_count = ARRAY_SIZE(sm8650_pp),
+	.pingpong = sm8650_pp,
+	.dsc_count = ARRAY_SIZE(sm8650_dsc),
+	.dsc = sm8650_dsc,
+	.merge_3d_count = ARRAY_SIZE(sm8650_merge_3d),
+	.merge_3d = sm8650_merge_3d,
+	.wb_count = ARRAY_SIZE(sm8650_wb),
+	.wb = sm8650_wb,
+	.intf_count = ARRAY_SIZE(sm8650_intf),
+	.intf = sm8650_intf,
+	.vbif_count = ARRAY_SIZE(sm8650_vbif),
+	.vbif = sm8650_vbif,
+	.perf = &sm8650_perf_data,
+};
+
+#endif
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h
index aa1867943c9f..1d3e9666c741 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps msm8998_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0x7,
-	.qseed_type = DPU_SSPP_SCALER_QSEED3,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
@@ -70,7 +69,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1ac,
 		.features = VIG_MSM8998_MASK,
-		.sblk = &msm8998_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_1_2,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -78,7 +77,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1ac,
 		.features = VIG_MSM8998_MASK,
-		.sblk = &msm8998_vig_sblk_1,
+		.sblk = &dpu_vig_sblk_qseed3_1_2,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -86,7 +85,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1ac,
 		.features = VIG_MSM8998_MASK,
-		.sblk = &msm8998_vig_sblk_2,
+		.sblk = &dpu_vig_sblk_qseed3_1_2,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -94,7 +93,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1ac,
 		.features = VIG_MSM8998_MASK,
-		.sblk = &msm8998_vig_sblk_3,
+		.sblk = &dpu_vig_sblk_qseed3_1_2,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -102,7 +101,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1ac,
 		.features = DMA_MSM8998_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -110,7 +109,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1ac,
 		.features = DMA_MSM8998_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -118,7 +117,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1ac,
 		.features = DMA_CURSOR_MSM8998_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -126,7 +125,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x1ac,
 		.features = DMA_CURSOR_MSM8998_MASK,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h
index 38ac0c1a134b..7a23389a5732 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sdm845_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED3,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
@@ -68,7 +67,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1c8,
 		.features = VIG_SDM845_MASK_SDMA,
-		.sblk = &sdm845_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_1_3,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -76,7 +75,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1c8,
 		.features = VIG_SDM845_MASK_SDMA,
-		.sblk = &sdm845_vig_sblk_1,
+		.sblk = &dpu_vig_sblk_qseed3_1_3,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -84,7 +83,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1c8,
 		.features = VIG_SDM845_MASK_SDMA,
-		.sblk = &sdm845_vig_sblk_2,
+		.sblk = &dpu_vig_sblk_qseed3_1_3,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -92,7 +91,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1c8,
 		.features = VIG_SDM845_MASK_SDMA,
-		.sblk = &sdm845_vig_sblk_3,
+		.sblk = &dpu_vig_sblk_qseed3_1_3,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -100,7 +99,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1c8,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -108,7 +107,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1c8,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -116,7 +115,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1c8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -124,7 +123,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x1c8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h
new file mode 100644
index 000000000000..cbbdaebe357e
--- /dev/null
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022. Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2023, Richard Acayan. All rights reserved.
+ */
+
+#ifndef _DPU_4_1_SDM670_H
+#define _DPU_4_1_SDM670_H
+
+static const struct dpu_mdp_cfg sdm670_mdp = {
+	.name = "top_0",
+	.base = 0x0, .len = 0x45c,
+	.features = BIT(DPU_MDP_AUDIO_SELECT),
+	.clk_ctrls = {
+		[DPU_CLK_CTRL_VIG0] = { .reg_off = 0x2ac, .bit_off = 0 },
+		[DPU_CLK_CTRL_VIG1] = { .reg_off = 0x2b4, .bit_off = 0 },
+		[DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 },
+		[DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 },
+		[DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 },
+	},
+};
+
+static const struct dpu_sspp_cfg sdm670_sspp[] = {
+	{
+		.name = "sspp_0", .id = SSPP_VIG0,
+		.base = 0x4000, .len = 0x1c8,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_1_3,
+		.xin_id = 0,
+		.type = SSPP_TYPE_VIG,
+		.clk_ctrl = DPU_CLK_CTRL_VIG0,
+	}, {
+		.name = "sspp_1", .id = SSPP_VIG1,
+		.base = 0x6000, .len = 0x1c8,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_1_3,
+		.xin_id = 4,
+		.type = SSPP_TYPE_VIG,
+		.clk_ctrl = DPU_CLK_CTRL_VIG0,
+	}, {
+		.name = "sspp_8", .id = SSPP_DMA0,
+		.base = 0x24000, .len = 0x1c8,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 1,
+		.type = SSPP_TYPE_DMA,
+		.clk_ctrl = DPU_CLK_CTRL_DMA0,
+	}, {
+		.name = "sspp_9", .id = SSPP_DMA1,
+		.base = 0x26000, .len = 0x1c8,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 5,
+		.type = SSPP_TYPE_DMA,
+		.clk_ctrl = DPU_CLK_CTRL_DMA1,
+	}, {
+		.name = "sspp_10", .id = SSPP_DMA2,
+		.base = 0x28000, .len = 0x1c8,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
+		.xin_id = 9,
+		.type = SSPP_TYPE_DMA,
+		.clk_ctrl = DPU_CLK_CTRL_DMA2,
+	},
+};
+
+static const struct dpu_dsc_cfg sdm670_dsc[] = {
+	{
+		.name = "dsc_0", .id = DSC_0,
+		.base = 0x80000, .len = 0x140,
+	}, {
+		.name = "dsc_1", .id = DSC_1,
+		.base = 0x80400, .len = 0x140,
+	},
+};
+
+static const struct dpu_mdss_version sdm670_mdss_ver = {
+	.core_major_ver = 4,
+	.core_minor_ver = 1,
+};
+
+const struct dpu_mdss_cfg dpu_sdm670_cfg = {
+	.mdss_ver = &sdm670_mdss_ver,
+	.caps = &sdm845_dpu_caps,
+	.mdp = &sdm670_mdp,
+	.ctl_count = ARRAY_SIZE(sdm845_ctl),
+	.ctl = sdm845_ctl,
+	.sspp_count = ARRAY_SIZE(sdm670_sspp),
+	.sspp = sdm670_sspp,
+	.mixer_count = ARRAY_SIZE(sdm845_lm),
+	.mixer = sdm845_lm,
+	.pingpong_count = ARRAY_SIZE(sdm845_pp),
+	.pingpong = sdm845_pp,
+	.dsc_count = ARRAY_SIZE(sdm670_dsc),
+	.dsc = sdm670_dsc,
+	.intf_count = ARRAY_SIZE(sdm845_intf),
+	.intf = sdm845_intf,
+	.vbif_count = ARRAY_SIZE(sdm845_vbif),
+	.vbif = sdm845_vbif,
+	.perf = &sdm845_perf_data,
+};
+
+#endif
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
index 9392ad2b4d3f..145f3d5953a3 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sm8150_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED3,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
@@ -77,7 +76,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -85,7 +84,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_1,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -93,7 +92,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_2,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -101,7 +100,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_3,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -109,7 +108,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f0,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -117,7 +116,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f0,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -125,7 +124,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f0,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -133,7 +132,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x1f0,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
index e07f4c8c25b9..9e3bec8bc121 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sc8180x_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED3,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
@@ -76,7 +75,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -84,7 +83,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_1,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
@@ -92,7 +91,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_2,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
@@ -100,7 +99,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1f0,
 		.features = VIG_SDM845_MASK,
-		.sblk = &sdm845_vig_sblk_3,
+		.sblk = &dpu_vig_sblk_qseed3_1_4,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -108,7 +107,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f0,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -116,7 +115,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f0,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -124,7 +123,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f0,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -132,7 +131,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x1f0,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
@@ -367,6 +366,7 @@ static const struct dpu_perf_cfg sc8180x_perf_data = {
 	.min_llcc_ib = 800000,
 	.min_dram_ib = 800000,
 	.danger_lut_tbl = {0xf, 0xffff, 0x0},
+	.safe_lut_tbl = {0xfff0, 0xf000, 0xffff},
 	.qos_lut_tbl = {
 		{.nentry = ARRAY_SIZE(sc7180_qos_linear),
 		.entries = sc7180_qos_linear
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h
index cec7af6667dc..76b2ec0d2489 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h
@@ -68,8 +68,8 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f0,
-		.features = VIG_SM6125_MASK,
-		.sblk = &sm6125_vig_sblk_0,
+		.features = VIG_SDM845_MASK,
+		.sblk = &dpu_vig_sblk_qseed3_2_4,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -77,7 +77,7 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f0,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -85,7 +85,7 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f0,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
index 94278a3e3483..a57d50b1f028 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sm8250_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
@@ -32,7 +31,7 @@ static const struct dpu_mdp_cfg sm8250_mdp = {
 		[DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 },
 		[DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 },
-		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x3b8, .bit_off = 24 },
+		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 },
 	},
 };
 
@@ -75,32 +74,32 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_0,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
 	}, {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_1,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
 	}, {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_2,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
 	}, {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK_SDMA,
-		.sblk = &sm8250_vig_sblk_3,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -108,7 +107,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -116,7 +115,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -124,7 +123,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -132,7 +131,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
@@ -337,8 +336,8 @@ static const struct dpu_wb_cfg sm8250_wb[] = {
 		.name = "wb_2", .id = WB_2,
 		.base = 0x65000, .len = 0x2c8,
 		.features = WB_SM8250_MASK,
-		.format_list = wb2_formats,
-		.num_formats = ARRAY_SIZE(wb2_formats),
+		.format_list = wb2_formats_rgb_yuv,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb_yuv),
 		.clk_ctrl = DPU_CLK_CTRL_WB2,
 		.xin_id = 6,
 		.vbif_idx = VBIF_RT,
@@ -385,6 +384,7 @@ const struct dpu_mdss_cfg dpu_sm8250_cfg = {
 	.mdss_ver = &sm8250_mdss_ver,
 	.caps = &sm8250_dpu_caps,
 	.mdp = &sm8250_mdp,
+	.cdm = &sc7280_cdm,
 	.ctl_count = ARRAY_SIZE(sm8250_ctl),
 	.ctl = sm8250_ctl,
 	.sspp_count = ARRAY_SIZE(sm8250_sspp),
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
index c0d88ddccb28..7382ebb6e5b2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sc7180_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0x9,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
 	.max_linewidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
@@ -25,7 +24,7 @@ static const struct dpu_mdp_cfg sc7180_mdp = {
 		[DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2c4, .bit_off = 8 },
-		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x3b8, .bit_off = 24 },
+		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 },
 	},
 };
 
@@ -52,8 +51,8 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sc7180_vig_sblk_0,
+		.features = VIG_SDM845_MASK,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -61,7 +60,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -69,7 +68,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -77,7 +76,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -158,8 +157,8 @@ static const struct dpu_wb_cfg sc7180_wb[] = {
 		.name = "wb_2", .id = WB_2,
 		.base = 0x65000, .len = 0x2c8,
 		.features = WB_SM8250_MASK,
-		.format_list = wb2_formats,
-		.num_formats = ARRAY_SIZE(wb2_formats),
+		.format_list = wb2_formats_rgb,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb),
 		.clk_ctrl = DPU_CLK_CTRL_WB2,
 		.xin_id = 6,
 		.vbif_idx = VBIF_RT,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
index 57ce14c18def..43f64a005f5a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sm6115_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_LINE_WIDTH,
 	.max_mixer_blendstages = 0x4,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
 	.max_linewidth = 2160,
@@ -39,8 +38,8 @@ static const struct dpu_sspp_cfg sm6115_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm6115_vig_sblk_0,
+		.features = VIG_SDM845_MASK,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -48,7 +47,7 @@ static const struct dpu_sspp_cfg sm6115_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h
index 62db84bd15f2..e17a30be7525 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h
@@ -11,7 +11,6 @@
 static const struct dpu_caps sm6350_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0x7,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
@@ -59,8 +58,8 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sc7180_vig_sblk_0,
+		.features = VIG_SDM845_MASK,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -68,7 +67,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -76,7 +75,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -84,7 +83,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h
index fb36fba5171c..3cbb2fe8aba2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h
@@ -39,7 +39,7 @@ static const struct dpu_sspp_cfg qcm2290_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
 		.features = VIG_QCM2290_MASK,
-		.sblk = &qcm2290_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_noscale,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -47,7 +47,7 @@ static const struct dpu_sspp_cfg qcm2290_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK,
-		.sblk = &qcm2290_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h
index 5a3aad364c78..a06c8634d2d7 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h
@@ -11,7 +11,6 @@
 static const struct dpu_caps sm6375_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_LINE_WIDTH,
 	.max_mixer_blendstages = 0x4,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
 	.max_linewidth = 2160,
@@ -40,8 +39,8 @@ static const struct dpu_sspp_cfg sm6375_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm6115_vig_sblk_0,
+		.features = VIG_SDM845_MASK,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -49,7 +48,7 @@ static const struct dpu_sspp_cfg sm6375_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
index 1709ba57f384..aced16e350da 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sm8350_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
@@ -31,6 +30,7 @@ static const struct dpu_mdp_cfg sm8350_mdp = {
 		[DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 },
+		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 },
 		[DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 },
 	},
 };
@@ -74,64 +74,64 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_0,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
 	}, {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_1,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
 	}, {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_2,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
 	}, {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x1f8,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_3,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
 	}, {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
-		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
 	}, {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f8,
-		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
 	}, {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f8,
-		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
 	}, {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x1f8,
-		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_3,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
@@ -298,6 +298,21 @@ static const struct dpu_dsc_cfg sm8350_dsc[] = {
 	},
 };
 
+static const struct dpu_wb_cfg sm8350_wb[] = {
+	{
+		.name = "wb_2", .id = WB_2,
+		.base = 0x65000, .len = 0x2c8,
+		.features = WB_SM8250_MASK,
+		.format_list = wb2_formats_rgb,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb),
+		.clk_ctrl = DPU_CLK_CTRL_WB2,
+		.xin_id = 6,
+		.vbif_idx = VBIF_RT,
+		.maxlinewidth = 4096,
+		.intr_wb_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 4),
+	},
+};
+
 static const struct dpu_intf_cfg sm8350_intf[] = {
 	{
 		.name = "intf_0", .id = INTF_0,
@@ -393,6 +408,8 @@ const struct dpu_mdss_cfg dpu_sm8350_cfg = {
 	.dsc = sm8350_dsc,
 	.merge_3d_count = ARRAY_SIZE(sm8350_merge_3d),
 	.merge_3d = sm8350_merge_3d,
+	.wb_count = ARRAY_SIZE(sm8350_wb),
+	.wb = sm8350_wb,
 	.intf_count = ARRAY_SIZE(sm8350_intf),
 	.intf = sm8350_intf,
 	.vbif_count = ARRAY_SIZE(sdm845_vbif),
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
index 15942fa5a8e0..2f153e0b5c6a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sc7280_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0x7,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
 	.max_linewidth = 2400,
@@ -25,7 +24,7 @@ static const struct dpu_mdp_cfg sc7280_mdp = {
 		[DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2c4, .bit_off = 8 },
-		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x3b8, .bit_off = 24 },
+		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 },
 	},
 };
 
@@ -58,7 +57,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = {
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x1f8,
 		.features = VIG_SC7280_MASK_SDMA,
-		.sblk = &sc7280_vig_sblk_0,
+		.sblk = &dpu_vig_sblk_qseed3_3_0_rot_v2,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
@@ -66,7 +65,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x1f8,
 		.features = DMA_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -74,7 +73,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -82,7 +81,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x1f8,
 		.features = DMA_CURSOR_SDM845_MASK_SDMA,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -170,8 +169,8 @@ static const struct dpu_wb_cfg sc7280_wb[] = {
 		.name = "wb_2", .id = WB_2,
 		.base = 0x65000, .len = 0x2c8,
 		.features = WB_SM8250_MASK,
-		.format_list = wb2_formats,
-		.num_formats = ARRAY_SIZE(wb2_formats),
+		.format_list = wb2_formats_rgb_yuv,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb_yuv),
 		.clk_ctrl = DPU_CLK_CTRL_WB2,
 		.xin_id = 6,
 		.vbif_idx = VBIF_RT,
@@ -249,6 +248,7 @@ const struct dpu_mdss_cfg dpu_sc7280_cfg = {
 	.mdss_ver = &sc7280_mdss_ver,
 	.caps = &sc7280_dpu_caps,
 	.mdp = &sc7280_mdp,
+	.cdm = &sc7280_cdm,
 	.ctl_count = ARRAY_SIZE(sc7280_ctl),
 	.ctl = sc7280_ctl,
 	.sspp_count = ARRAY_SIZE(sc7280_sspp),
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
index 4c0528794e7a..0d143e390eca 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sc8280xp_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 11,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
@@ -75,32 +74,32 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x2ac,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_0,
+		.features = VIG_SDM845_MASK,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
 	}, {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x2ac,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_1,
+		.features = VIG_SDM845_MASK,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
 	}, {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x2ac,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_2,
+		.features = VIG_SDM845_MASK,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
 	}, {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x2ac,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_3,
+		.features = VIG_SDM845_MASK,
+		.sblk = &dpu_vig_sblk_qseed3_3_0,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
@@ -108,7 +107,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x2ac,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
@@ -116,7 +115,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x2ac,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
@@ -124,7 +123,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x2ac,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
@@ -132,7 +131,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x2ac,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
index 7742f52be859..a1779c5597ae 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sm8450_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
@@ -32,6 +31,7 @@ static const struct dpu_mdp_cfg sm8450_mdp = {
 		[DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 },
 		[DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 },
+		[DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 },
 		[DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 },
 	},
 };
@@ -75,64 +75,64 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x32c,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_0,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_1,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG0,
 	}, {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x32c,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_1,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_1,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG1,
 	}, {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x32c,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_2,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_1,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG2,
 	}, {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x32c,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8250_vig_sblk_3,
+		.features = VIG_SDM845_MASK_SDMA,
+		.sblk = &dpu_vig_sblk_qseed3_3_1,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 		.clk_ctrl = DPU_CLK_CTRL_VIG3,
 	}, {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x32c,
-		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA0,
 	}, {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x32c,
-		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.features = DMA_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA1,
 	}, {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x32c,
-		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA2,
 	}, {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x32c,
-		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_3,
+		.features = DMA_CURSOR_SDM845_MASK_SDMA,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 		.clk_ctrl = DPU_CLK_CTRL_DMA3,
@@ -316,6 +316,21 @@ static const struct dpu_dsc_cfg sm8450_dsc[] = {
 	},
 };
 
+static const struct dpu_wb_cfg sm8450_wb[] = {
+	{
+		.name = "wb_2", .id = WB_2,
+		.base = 0x65000, .len = 0x2c8,
+		.features = WB_SM8250_MASK,
+		.format_list = wb2_formats_rgb,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb),
+		.clk_ctrl = DPU_CLK_CTRL_WB2,
+		.xin_id = 6,
+		.vbif_idx = VBIF_RT,
+		.maxlinewidth = 4096,
+		.intr_wb_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 4),
+	},
+};
+
 static const struct dpu_intf_cfg sm8450_intf[] = {
 	{
 		.name = "intf_0", .id = INTF_0,
@@ -411,6 +426,8 @@ const struct dpu_mdss_cfg dpu_sm8450_cfg = {
 	.dsc = sm8450_dsc,
 	.merge_3d_count = ARRAY_SIZE(sm8450_merge_3d),
 	.merge_3d = sm8450_merge_3d,
+	.wb_count = ARRAY_SIZE(sm8450_wb),
+	.wb = sm8450_wb,
 	.intf_count = ARRAY_SIZE(sm8450_intf),
 	.intf = sm8450_intf,
 	.vbif_count = ARRAY_SIZE(sdm845_vbif),
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
index 69b80af6566a..ad48defa154f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
@@ -10,7 +10,6 @@
 static const struct dpu_caps sm8550_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
-	.qseed_type = DPU_SSPP_SCALER_QSEED4,
 	.has_src_split = true,
 	.has_dim_layer = true,
 	.has_idle_pc = true,
@@ -67,71 +66,71 @@ static const struct dpu_sspp_cfg sm8550_sspp[] = {
 	{
 		.name = "sspp_0", .id = SSPP_VIG0,
 		.base = 0x4000, .len = 0x344,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8550_vig_sblk_0,
+		.features = VIG_SDM845_MASK,
+		.sblk = &dpu_vig_sblk_qseed3_3_2,
 		.xin_id = 0,
 		.type = SSPP_TYPE_VIG,
 	}, {
 		.name = "sspp_1", .id = SSPP_VIG1,
 		.base = 0x6000, .len = 0x344,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8550_vig_sblk_1,
+		.features = VIG_SDM845_MASK,
+		.sblk = &dpu_vig_sblk_qseed3_3_2,
 		.xin_id = 4,
 		.type = SSPP_TYPE_VIG,
 	}, {
 		.name = "sspp_2", .id = SSPP_VIG2,
 		.base = 0x8000, .len = 0x344,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8550_vig_sblk_2,
+		.features = VIG_SDM845_MASK,
+		.sblk = &dpu_vig_sblk_qseed3_3_2,
 		.xin_id = 8,
 		.type = SSPP_TYPE_VIG,
 	}, {
 		.name = "sspp_3", .id = SSPP_VIG3,
 		.base = 0xa000, .len = 0x344,
-		.features = VIG_SC7180_MASK,
-		.sblk = &sm8550_vig_sblk_3,
+		.features = VIG_SDM845_MASK,
+		.sblk = &dpu_vig_sblk_qseed3_3_2,
 		.xin_id = 12,
 		.type = SSPP_TYPE_VIG,
 	}, {
 		.name = "sspp_8", .id = SSPP_DMA0,
 		.base = 0x24000, .len = 0x344,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_0,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 1,
 		.type = SSPP_TYPE_DMA,
 	}, {
 		.name = "sspp_9", .id = SSPP_DMA1,
 		.base = 0x26000, .len = 0x344,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_1,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 5,
 		.type = SSPP_TYPE_DMA,
 	}, {
 		.name = "sspp_10", .id = SSPP_DMA2,
 		.base = 0x28000, .len = 0x344,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_2,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 9,
 		.type = SSPP_TYPE_DMA,
 	}, {
 		.name = "sspp_11", .id = SSPP_DMA3,
 		.base = 0x2a000, .len = 0x344,
 		.features = DMA_SDM845_MASK,
-		.sblk = &sdm845_dma_sblk_3,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 13,
 		.type = SSPP_TYPE_DMA,
 	}, {
 		.name = "sspp_12", .id = SSPP_DMA4,
 		.base = 0x2c000, .len = 0x344,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sm8550_dma_sblk_4,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 14,
 		.type = SSPP_TYPE_DMA,
 	}, {
 		.name = "sspp_13", .id = SSPP_DMA5,
 		.base = 0x2e000, .len = 0x344,
 		.features = DMA_CURSOR_SDM845_MASK,
-		.sblk = &sm8550_dma_sblk_5,
+		.sblk = &dpu_dma_sblk,
 		.xin_id = 15,
 		.type = SSPP_TYPE_DMA,
 	},
@@ -316,8 +315,8 @@ static const struct dpu_wb_cfg sm8550_wb[] = {
 		.name = "wb_2", .id = WB_2,
 		.base = 0x65000, .len = 0x2c8,
 		.features = WB_SM8250_MASK,
-		.format_list = wb2_formats,
-		.num_formats = ARRAY_SIZE(wb2_formats),
+		.format_list = wb2_formats_rgb,
+		.num_formats = ARRAY_SIZE(wb2_formats_rgb),
 		.xin_id = 6,
 		.vbif_idx = VBIF_RT,
 		.maxlinewidth = 4096,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index 3c475f8042b0..88c2e51ab166 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2014-2021 The Linux Foundation. All rights reserved.
  * Copyright (C) 2013 Red Hat
  * Author: Rob Clark <robdclark@gmail.com>
@@ -51,17 +51,6 @@ static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc)
 	return to_dpu_kms(priv->kms);
 }
 
-static void dpu_crtc_destroy(struct drm_crtc *crtc)
-{
-	struct dpu_crtc *dpu_crtc = to_dpu_crtc(crtc);
-
-	if (!crtc)
-		return;
-
-	drm_crtc_cleanup(crtc);
-	kfree(dpu_crtc);
-}
-
 static struct drm_encoder *get_encoder_from_crtc(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -125,7 +114,7 @@ static void dpu_crtc_setup_lm_misr(struct dpu_crtc_state *crtc_state)
 			continue;
 
 		/* Calculate MISR over 1 frame */
-		m->hw_lm->ops.setup_misr(m->hw_lm, true, 1);
+		m->hw_lm->ops.setup_misr(m->hw_lm);
 	}
 }
 
@@ -1435,7 +1424,6 @@ static int dpu_crtc_late_register(struct drm_crtc *crtc)
 
 static const struct drm_crtc_funcs dpu_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
-	.destroy = dpu_crtc_destroy,
 	.page_flip = drm_atomic_helper_page_flip,
 	.reset = dpu_crtc_reset,
 	.atomic_duplicate_state = dpu_crtc_duplicate_state,
@@ -1469,9 +1457,13 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane,
 	struct dpu_crtc *dpu_crtc;
 	int i, ret;
 
-	dpu_crtc = kzalloc(sizeof(*dpu_crtc), GFP_KERNEL);
-	if (!dpu_crtc)
-		return ERR_PTR(-ENOMEM);
+	dpu_crtc = drmm_crtc_alloc_with_planes(dev, struct dpu_crtc, base,
+					       plane, cursor,
+					       &dpu_crtc_funcs,
+					       NULL);
+
+	if (IS_ERR(dpu_crtc))
+		return ERR_CAST(dpu_crtc);
 
 	crtc = &dpu_crtc->base;
 	crtc->dev = dev;
@@ -1491,9 +1483,6 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane,
 				dpu_crtc_frame_event_work);
 	}
 
-	drm_crtc_init_with_planes(dev, crtc, plane, cursor, &dpu_crtc_funcs,
-				NULL);
-
 	drm_crtc_helper_add(crtc, &dpu_crtc_helper_funcs);
 
 	if (dpu_kms->catalog->dspp_count)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 1cf7ff6caff4..83380bc92a00 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -2,7 +2,7 @@
 /*
  * Copyright (C) 2013 Red Hat
  * Copyright (c) 2014-2018, 2020-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  *
  * Author: Rob Clark <robdclark@gmail.com>
  */
@@ -16,6 +16,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_file.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_framebuffer.h>
 
 #include "msm_drv.h"
 #include "dpu_kms.h"
@@ -26,6 +27,7 @@
 #include "dpu_hw_dspp.h"
 #include "dpu_hw_dsc.h"
 #include "dpu_hw_merge3d.h"
+#include "dpu_hw_cdm.h"
 #include "dpu_formats.h"
 #include "dpu_encoder_phys.h"
 #include "dpu_crtc.h"
@@ -39,6 +41,9 @@
 #define DPU_ERROR_ENC(e, fmt, ...) DPU_ERROR("enc%d " fmt,\
 		(e) ? (e)->base.base.id : -1, ##__VA_ARGS__)
 
+#define DPU_ERROR_ENC_RATELIMITED(e, fmt, ...) DPU_ERROR_RATELIMITED("enc%d " fmt,\
+		(e) ? (e)->base.base.id : -1, ##__VA_ARGS__)
+
 /*
  * Two to anticipate panels that can do cmd/vid dynamic switching
  * plan is to create all possible physical encoder types, and switch between
@@ -151,6 +156,8 @@ enum dpu_enc_rc_states {
  * @crtc_frame_event_cb:	callback handler for frame event
  * @crtc_frame_event_cb_data:	callback handler private data
  * @frame_done_timeout_ms:	frame done timeout in ms
+ * @frame_done_timeout_cnt:	atomic counter tracking the number of frame
+ * 				done timeouts
  * @frame_done_timer:		watchdog timer for frame done event
  * @disp_info:			local copy of msm_display_info struct
  * @idle_pc_supported:		indicate if idle power collaps is supported
@@ -184,13 +191,13 @@ struct dpu_encoder_virt {
 	struct drm_crtc *crtc;
 	struct drm_connector *connector;
 
-	struct dentry *debugfs_root;
 	struct mutex enc_lock;
 	DECLARE_BITMAP(frame_busy_mask, MAX_PHYS_ENCODERS_PER_VIRTUAL);
 	void (*crtc_frame_event_cb)(void *, u32 event);
 	void *crtc_frame_event_cb_data;
 
 	atomic_t frame_done_timeout_ms;
+	atomic_t frame_done_timeout_cnt;
 	struct timer_list frame_done_timer;
 
 	struct msm_display_info disp_info;
@@ -255,7 +262,7 @@ void dpu_encoder_setup_misr(const struct drm_encoder *drm_enc)
 		if (!phys->hw_intf || !phys->hw_intf->ops.setup_misr)
 			continue;
 
-		phys->hw_intf->ops.setup_misr(phys->hw_intf, true, 1);
+		phys->hw_intf->ops.setup_misr(phys->hw_intf);
 	}
 }
 
@@ -439,41 +446,6 @@ int dpu_encoder_get_linecount(struct drm_encoder *drm_enc)
 	return linecount;
 }
 
-static void dpu_encoder_destroy(struct drm_encoder *drm_enc)
-{
-	struct dpu_encoder_virt *dpu_enc = NULL;
-	int i = 0;
-
-	if (!drm_enc) {
-		DPU_ERROR("invalid encoder\n");
-		return;
-	}
-
-	dpu_enc = to_dpu_encoder_virt(drm_enc);
-	DPU_DEBUG_ENC(dpu_enc, "\n");
-
-	mutex_lock(&dpu_enc->enc_lock);
-
-	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
-		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
-
-		if (phys->ops.destroy) {
-			phys->ops.destroy(phys);
-			--dpu_enc->num_phys_encs;
-			dpu_enc->phys_encs[i] = NULL;
-		}
-	}
-
-	if (dpu_enc->num_phys_encs)
-		DPU_ERROR_ENC(dpu_enc, "expected 0 num_phys_encs not %d\n",
-				dpu_enc->num_phys_encs);
-	dpu_enc->num_phys_encs = 0;
-	mutex_unlock(&dpu_enc->enc_lock);
-
-	drm_encoder_cleanup(drm_enc);
-	mutex_destroy(&dpu_enc->enc_lock);
-}
-
 void dpu_encoder_helper_split_config(
 		struct dpu_encoder_phys *phys_enc,
 		enum dpu_intf interface)
@@ -614,6 +586,7 @@ static int dpu_encoder_virt_atomic_check(
 	struct drm_display_mode *adj_mode;
 	struct msm_display_topology topology;
 	struct dpu_global_state *global_state;
+	struct drm_framebuffer *fb;
 	struct drm_dsc_config *dsc;
 	int i = 0;
 	int ret = 0;
@@ -655,6 +628,22 @@ static int dpu_encoder_virt_atomic_check(
 	topology = dpu_encoder_get_topology(dpu_enc, dpu_kms, adj_mode, crtc_state, dsc);
 
 	/*
+	 * Use CDM only for writeback at the moment as other interfaces cannot handle it.
+	 * if writeback itself cannot handle cdm for some reason it will fail in its atomic_check()
+	 * earlier.
+	 */
+	if (dpu_enc->disp_info.intf_type == INTF_WB && conn_state->writeback_job) {
+		fb = conn_state->writeback_job->fb;
+
+		if (fb && DPU_FORMAT_IS_YUV(to_dpu_format(msm_framebuffer_format(fb))))
+			topology.needs_cdm = true;
+		if (topology.needs_cdm && !dpu_enc->cur_master->hw_cdm)
+			crtc_state->mode_changed = true;
+		else if (!topology.needs_cdm && dpu_enc->cur_master->hw_cdm)
+			crtc_state->mode_changed = true;
+	}
+
+	/*
 	 * Release and Allocate resources on every modeset
 	 * Dont allocate when active is false.
 	 */
@@ -1094,6 +1083,15 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc,
 
 	dpu_enc->dsc_mask = dsc_mask;
 
+	if (dpu_enc->disp_info.intf_type == INTF_WB && conn_state->writeback_job) {
+		struct dpu_hw_blk *hw_cdm = NULL;
+
+		dpu_rm_get_assigned_resources(&dpu_kms->rm, global_state,
+					      drm_enc->base.id, DPU_HW_BLK_CDM,
+					      &hw_cdm, 1);
+		dpu_enc->cur_master->hw_cdm = hw_cdm ? to_dpu_hw_cdm(hw_cdm) : NULL;
+	}
+
 	cstate = to_dpu_crtc_state(crtc_state);
 
 	for (i = 0; i < num_lm; i++) {
@@ -1204,6 +1202,8 @@ static void dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc,
 
 	dpu_enc->dsc = dpu_encoder_get_dsc_config(drm_enc);
 
+	atomic_set(&dpu_enc->frame_done_timeout_cnt, 0);
+
 	if (disp_info->intf_type == INTF_DP)
 		dpu_enc->wide_bus_en = msm_dp_wide_bus_available(priv->dp[index]);
 	else if (disp_info->intf_type == INTF_DSI)
@@ -2080,6 +2080,15 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc)
 					phys_enc->hw_pp->merge_3d->idx);
 	}
 
+	if (phys_enc->hw_cdm) {
+		if (phys_enc->hw_cdm->ops.bind_pingpong_blk && phys_enc->hw_pp)
+			phys_enc->hw_cdm->ops.bind_pingpong_blk(phys_enc->hw_cdm,
+								PINGPONG_NONE);
+		if (phys_enc->hw_ctl->ops.update_pending_flush_cdm)
+			phys_enc->hw_ctl->ops.update_pending_flush_cdm(phys_enc->hw_ctl,
+								       phys_enc->hw_cdm->idx);
+	}
+
 	if (dpu_enc->dsc) {
 		dpu_encoder_unprep_dsc(dpu_enc);
 		dpu_enc->dsc = NULL;
@@ -2108,18 +2117,20 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc)
 #ifdef CONFIG_DEBUG_FS
 static int _dpu_encoder_status_show(struct seq_file *s, void *data)
 {
-	struct dpu_encoder_virt *dpu_enc = s->private;
+	struct drm_encoder *drm_enc = s->private;
+	struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc);
 	int i;
 
 	mutex_lock(&dpu_enc->enc_lock);
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
 		struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
 
-		seq_printf(s, "intf:%d  wb:%d  vsync:%8d     underrun:%8d    ",
+		seq_printf(s, "intf:%d  wb:%d  vsync:%8d     underrun:%8d    frame_done_cnt:%d",
 				phys->hw_intf ? phys->hw_intf->idx - INTF_0 : -1,
 				phys->hw_wb ? phys->hw_wb->idx - WB_0 : -1,
 				atomic_read(&phys->vsync_cnt),
-				atomic_read(&phys->underrun_cnt));
+				atomic_read(&phys->underrun_cnt),
+				atomic_read(&dpu_enc->frame_done_timeout_cnt));
 
 		seq_printf(s, "mode: %s\n", dpu_encoder_helper_get_intf_type(phys->intf_mode));
 	}
@@ -2130,49 +2141,18 @@ static int _dpu_encoder_status_show(struct seq_file *s, void *data)
 
 DEFINE_SHOW_ATTRIBUTE(_dpu_encoder_status);
 
-static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc)
+static void dpu_encoder_debugfs_init(struct drm_encoder *drm_enc, struct dentry *root)
 {
-	struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc);
-
-	char name[12];
-
-	if (!drm_enc->dev) {
-		DPU_ERROR("invalid encoder or kms\n");
-		return -EINVAL;
-	}
-
-	snprintf(name, sizeof(name), "encoder%u", drm_enc->base.id);
-
-	/* create overall sub-directory for the encoder */
-	dpu_enc->debugfs_root = debugfs_create_dir(name,
-			drm_enc->dev->primary->debugfs_root);
-
 	/* don't error check these */
 	debugfs_create_file("status", 0600,
-		dpu_enc->debugfs_root, dpu_enc, &_dpu_encoder_status_fops);
-
-	return 0;
+			    root, drm_enc, &_dpu_encoder_status_fops);
 }
 #else
-static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc)
-{
-	return 0;
-}
+#define dpu_encoder_debugfs_init NULL
 #endif
 
-static int dpu_encoder_late_register(struct drm_encoder *encoder)
-{
-	return _dpu_encoder_init_debugfs(encoder);
-}
-
-static void dpu_encoder_early_unregister(struct drm_encoder *encoder)
-{
-	struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(encoder);
-
-	debugfs_remove_recursive(dpu_enc->debugfs_root);
-}
-
 static int dpu_encoder_virt_add_phys_encs(
+		struct drm_device *dev,
 		struct msm_display_info *disp_info,
 		struct dpu_encoder_virt *dpu_enc,
 		struct dpu_enc_phys_init_params *params)
@@ -2194,7 +2174,7 @@ static int dpu_encoder_virt_add_phys_encs(
 
 
 	if (disp_info->intf_type == INTF_WB) {
-		enc = dpu_encoder_phys_wb_init(params);
+		enc = dpu_encoder_phys_wb_init(dev, params);
 
 		if (IS_ERR(enc)) {
 			DPU_ERROR_ENC(dpu_enc, "failed to init wb enc: %ld\n",
@@ -2205,7 +2185,7 @@ static int dpu_encoder_virt_add_phys_encs(
 		dpu_enc->phys_encs[dpu_enc->num_phys_encs] = enc;
 		++dpu_enc->num_phys_encs;
 	} else if (disp_info->is_cmd_mode) {
-		enc = dpu_encoder_phys_cmd_init(params);
+		enc = dpu_encoder_phys_cmd_init(dev, params);
 
 		if (IS_ERR(enc)) {
 			DPU_ERROR_ENC(dpu_enc, "failed to init cmd enc: %ld\n",
@@ -2216,7 +2196,7 @@ static int dpu_encoder_virt_add_phys_encs(
 		dpu_enc->phys_encs[dpu_enc->num_phys_encs] = enc;
 		++dpu_enc->num_phys_encs;
 	} else {
-		enc = dpu_encoder_phys_vid_init(params);
+		enc = dpu_encoder_phys_vid_init(dev, params);
 
 		if (IS_ERR(enc)) {
 			DPU_ERROR_ENC(dpu_enc, "failed to init vid enc: %ld\n",
@@ -2305,7 +2285,7 @@ static int dpu_encoder_setup_display(struct dpu_encoder_virt *dpu_enc,
 			break;
 		}
 
-		ret = dpu_encoder_virt_add_phys_encs(disp_info,
+		ret = dpu_encoder_virt_add_phys_encs(dpu_kms->dev, disp_info,
 				dpu_enc, &phys_params);
 		if (ret) {
 			DPU_ERROR_ENC(dpu_enc, "failed to add phys encs\n");
@@ -2339,7 +2319,10 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t)
 		return;
 	}
 
-	DPU_ERROR_ENC(dpu_enc, "frame done timeout\n");
+	DPU_ERROR_ENC_RATELIMITED(dpu_enc, "frame done timeout\n");
+
+	if (atomic_inc_return(&dpu_enc->frame_done_timeout_cnt) == 1)
+		msm_disp_snapshot_state(drm_enc->dev);
 
 	event = DPU_ENCODER_FRAME_EVENT_ERROR;
 	trace_dpu_enc_frame_done_timeout(DRMID(drm_enc), event);
@@ -2354,9 +2337,7 @@ static const struct drm_encoder_helper_funcs dpu_encoder_helper_funcs = {
 };
 
 static const struct drm_encoder_funcs dpu_encoder_funcs = {
-		.destroy = dpu_encoder_destroy,
-		.late_register = dpu_encoder_late_register,
-		.early_unregister = dpu_encoder_early_unregister,
+	.debugfs_init = dpu_encoder_debugfs_init,
 };
 
 struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
@@ -2365,20 +2346,13 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
 {
 	struct msm_drm_private *priv = dev->dev_private;
 	struct dpu_kms *dpu_kms = to_dpu_kms(priv->kms);
-	struct drm_encoder *drm_enc = NULL;
-	struct dpu_encoder_virt *dpu_enc = NULL;
-	int ret = 0;
-
-	dpu_enc = devm_kzalloc(dev->dev, sizeof(*dpu_enc), GFP_KERNEL);
-	if (!dpu_enc)
-		return ERR_PTR(-ENOMEM);
+	struct dpu_encoder_virt *dpu_enc;
+	int ret;
 
-	ret = drm_encoder_init(dev, &dpu_enc->base, &dpu_encoder_funcs,
-			       drm_enc_mode, NULL);
-	if (ret) {
-		devm_kfree(dev->dev, dpu_enc);
-		return ERR_PTR(ret);
-	}
+	dpu_enc = drmm_encoder_alloc(dev, struct dpu_encoder_virt, base,
+				     &dpu_encoder_funcs, drm_enc_mode, NULL);
+	if (IS_ERR(dpu_enc))
+		return ERR_CAST(dpu_enc);
 
 	drm_encoder_helper_add(&dpu_enc->base, &dpu_encoder_helper_funcs);
 
@@ -2388,10 +2362,13 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
 	mutex_init(&dpu_enc->rc_lock);
 
 	ret = dpu_encoder_setup_display(dpu_enc, dpu_kms, disp_info);
-	if (ret)
-		goto fail;
+	if (ret) {
+		DPU_ERROR("failed to setup encoder\n");
+		return ERR_PTR(-ENOMEM);
+	}
 
 	atomic_set(&dpu_enc->frame_done_timeout_ms, 0);
+	atomic_set(&dpu_enc->frame_done_timeout_cnt, 0);
 	timer_setup(&dpu_enc->frame_done_timer,
 			dpu_encoder_frame_done_timeout, 0);
 
@@ -2404,13 +2381,6 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
 	DPU_DEBUG_ENC(dpu_enc, "created\n");
 
 	return &dpu_enc->base;
-
-fail:
-	DPU_ERROR("failed to create encoder\n");
-	if (drm_enc)
-		dpu_encoder_destroy(drm_enc);
-
-	return ERR_PTR(ret);
 }
 
 int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
@@ -2437,9 +2407,6 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc,
 		case MSM_ENC_TX_COMPLETE:
 			fn_wait = phys->ops.wait_for_tx_complete;
 			break;
-		case MSM_ENC_VBLANK:
-			fn_wait = phys->ops.wait_for_vblank;
-			break;
 		default:
 			DPU_ERROR_ENC(dpu_enc, "unknown wait event %d\n",
 					event);
@@ -2497,7 +2464,6 @@ void dpu_encoder_phys_init(struct dpu_encoder_phys *phys_enc,
 	phys_enc->enc_spinlock = p->enc_spinlock;
 	phys_enc->enable_state = DPU_ENC_DISABLED;
 
-	atomic_set(&phys_enc->vblank_refcount, 0);
 	atomic_set(&phys_enc->pending_kickoff_cnt, 0);
 	atomic_set(&phys_enc->pending_ctlstart_cnt, 0);
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
index 6f04c3d56e77..993f26343331 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
@@ -14,8 +14,10 @@
 #include "dpu_hw_intf.h"
 #include "dpu_hw_wb.h"
 #include "dpu_hw_pingpong.h"
+#include "dpu_hw_cdm.h"
 #include "dpu_hw_ctl.h"
 #include "dpu_hw_top.h"
+#include "dpu_hw_util.h"
 #include "dpu_encoder.h"
 #include "dpu_crtc.h"
 
@@ -72,7 +74,6 @@ struct dpu_encoder_phys;
  * @enable:			DRM Call. Enable a DRM mode.
  * @disable:			DRM Call. Disable mode.
  * @atomic_check:		DRM Call. Atomic check new DRM state.
- * @destroy:			DRM Call. Destroy and release resources.
  * @control_vblank_irq		Register/Deregister for VBLANK IRQ
  * @wait_for_commit_done:	Wait for hardware to have flushed the
  *				current pending frames to hardware
@@ -102,11 +103,9 @@ struct dpu_encoder_phys_ops {
 	int (*atomic_check)(struct dpu_encoder_phys *encoder,
 			    struct drm_crtc_state *crtc_state,
 			    struct drm_connector_state *conn_state);
-	void (*destroy)(struct dpu_encoder_phys *encoder);
 	int (*control_vblank_irq)(struct dpu_encoder_phys *enc, bool enable);
 	int (*wait_for_commit_done)(struct dpu_encoder_phys *phys_enc);
 	int (*wait_for_tx_complete)(struct dpu_encoder_phys *phys_enc);
-	int (*wait_for_vblank)(struct dpu_encoder_phys *phys_enc);
 	void (*prepare_for_kickoff)(struct dpu_encoder_phys *phys_enc);
 	void (*handle_post_kickoff)(struct dpu_encoder_phys *phys_enc);
 	void (*trigger_start)(struct dpu_encoder_phys *phys_enc);
@@ -153,8 +152,10 @@ enum dpu_intr_idx {
  * @hw_pp:		Hardware interface to the ping pong registers
  * @hw_intf:		Hardware interface to the intf registers
  * @hw_wb:		Hardware interface to the wb registers
+ * @hw_cdm:		Hardware interface to the CDM registers
  * @dpu_kms:		Pointer to the dpu_kms top level
  * @cached_mode:	DRM mode cached at mode_set time, acted on in enable
+ * @vblank_ctl_lock:	Vblank ctl mutex lock to protect vblank_refcount
  * @enabled:		Whether the encoder has enabled and running a mode
  * @split_role:		Role to play in a split-panel configuration
  * @intf_mode:		Interface mode
@@ -181,13 +182,15 @@ struct dpu_encoder_phys {
 	struct dpu_hw_pingpong *hw_pp;
 	struct dpu_hw_intf *hw_intf;
 	struct dpu_hw_wb *hw_wb;
+	struct dpu_hw_cdm *hw_cdm;
 	struct dpu_kms *dpu_kms;
 	struct drm_display_mode cached_mode;
+	struct mutex vblank_ctl_lock;
 	enum dpu_enc_split_role split_role;
 	enum dpu_intf_mode intf_mode;
 	spinlock_t *enc_spinlock;
 	enum dpu_enc_enable_state enable_state;
-	atomic_t vblank_refcount;
+	int vblank_refcount;
 	atomic_t vsync_cnt;
 	atomic_t underrun_cnt;
 	atomic_t pending_ctlstart_cnt;
@@ -210,6 +213,7 @@ static inline int dpu_encoder_phys_inc_pending(struct dpu_encoder_phys *phys)
  * @wbirq_refcount:     Reference count of writeback interrupt
  * @wb_done_timeout_cnt: number of wb done irq timeout errors
  * @wb_cfg:  writeback block config to store fb related details
+ * @cdm_cfg: cdm block config needed to store writeback block's CDM configuration
  * @wb_conn: backpointer to writeback connector
  * @wb_job: backpointer to current writeback job
  * @dest:   dpu buffer layout for current writeback output buffer
@@ -219,6 +223,7 @@ struct dpu_encoder_phys_wb {
 	atomic_t wbirq_refcount;
 	int wb_done_timeout_cnt;
 	struct dpu_hw_wb_cfg wb_cfg;
+	struct dpu_hw_cdm_cfg cdm_cfg;
 	struct drm_writeback_connector *wb_conn;
 	struct drm_writeback_job *wb_job;
 	struct dpu_hw_fmt_layout dest;
@@ -281,22 +286,24 @@ struct dpu_encoder_wait_info {
  * @p:	Pointer to init params structure
  * Return: Error code or newly allocated encoder
  */
-struct dpu_encoder_phys *dpu_encoder_phys_vid_init(
+struct dpu_encoder_phys *dpu_encoder_phys_vid_init(struct drm_device *dev,
 		struct dpu_enc_phys_init_params *p);
 
 /**
  * dpu_encoder_phys_cmd_init - Construct a new command mode physical encoder
+ * @dev:  Corresponding device for devres management
  * @p:	Pointer to init params structure
  * Return: Error code or newly allocated encoder
  */
-struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(
+struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(struct drm_device *dev,
 		struct dpu_enc_phys_init_params *p);
 
 /**
  * dpu_encoder_phys_wb_init - initialize writeback encoder
+ * @dev:  Corresponding device for devres management
  * @init:	Pointer to init info structure with initialization params
  */
-struct dpu_encoder_phys *dpu_encoder_phys_wb_init(
+struct dpu_encoder_phys *dpu_encoder_phys_wb_init(struct drm_device *dev,
 		struct dpu_enc_phys_init_params *p);
 
 /**
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
index be185fe69793..a301e2833177 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
@@ -13,6 +13,8 @@
 #include "dpu_trace.h"
 #include "disp/msm_disp_snapshot.h"
 
+#include <drm/drm_managed.h>
+
 #define DPU_DEBUG_CMDENC(e, fmt, ...) DPU_DEBUG("enc%d intf%d " fmt, \
 		(e) && (e)->base.parent ? \
 		(e)->base.parent->base.id : -1, \
@@ -244,7 +246,8 @@ static int dpu_encoder_phys_cmd_control_vblank_irq(
 		return -EINVAL;
 	}
 
-	refcount = atomic_read(&phys_enc->vblank_refcount);
+	mutex_lock(&phys_enc->vblank_ctl_lock);
+	refcount = phys_enc->vblank_refcount;
 
 	/* Slave encoders don't report vblank */
 	if (!dpu_encoder_phys_cmd_is_master(phys_enc))
@@ -260,16 +263,24 @@ static int dpu_encoder_phys_cmd_control_vblank_irq(
 		      phys_enc->hw_pp->idx - PINGPONG_0,
 		      enable ? "true" : "false", refcount);
 
-	if (enable && atomic_inc_return(&phys_enc->vblank_refcount) == 1)
-		ret = dpu_core_irq_register_callback(phys_enc->dpu_kms,
-				phys_enc->irq[INTR_IDX_RDPTR],
-				dpu_encoder_phys_cmd_te_rd_ptr_irq,
-				phys_enc);
-	else if (!enable && atomic_dec_return(&phys_enc->vblank_refcount) == 0)
-		ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms,
-				phys_enc->irq[INTR_IDX_RDPTR]);
+	if (enable) {
+		if (phys_enc->vblank_refcount == 0)
+			ret = dpu_core_irq_register_callback(phys_enc->dpu_kms,
+					phys_enc->irq[INTR_IDX_RDPTR],
+					dpu_encoder_phys_cmd_te_rd_ptr_irq,
+					phys_enc);
+		if (!ret)
+			phys_enc->vblank_refcount++;
+	} else if (!enable) {
+		if (phys_enc->vblank_refcount == 1)
+			ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms,
+					phys_enc->irq[INTR_IDX_RDPTR]);
+		if (!ret)
+			phys_enc->vblank_refcount--;
+	}
 
 end:
+	mutex_unlock(&phys_enc->vblank_ctl_lock);
 	if (ret) {
 		DRM_ERROR("vblank irq err id:%u pp:%d ret:%d, enable %s/%d\n",
 			  DRMID(phys_enc->parent),
@@ -285,7 +296,7 @@ static void dpu_encoder_phys_cmd_irq_control(struct dpu_encoder_phys *phys_enc,
 {
 	trace_dpu_enc_phys_cmd_irq_ctrl(DRMID(phys_enc->parent),
 			phys_enc->hw_pp->idx - PINGPONG_0,
-			enable, atomic_read(&phys_enc->vblank_refcount));
+			enable, phys_enc->vblank_refcount);
 
 	if (enable) {
 		dpu_core_irq_register_callback(phys_enc->dpu_kms,
@@ -558,14 +569,6 @@ static void dpu_encoder_phys_cmd_disable(struct dpu_encoder_phys *phys_enc)
 	phys_enc->enable_state = DPU_ENC_DISABLED;
 }
 
-static void dpu_encoder_phys_cmd_destroy(struct dpu_encoder_phys *phys_enc)
-{
-	struct dpu_encoder_phys_cmd *cmd_enc =
-		to_dpu_encoder_phys_cmd(phys_enc);
-
-	kfree(cmd_enc);
-}
-
 static void dpu_encoder_phys_cmd_prepare_for_kickoff(
 		struct dpu_encoder_phys *phys_enc)
 {
@@ -681,33 +684,6 @@ static int dpu_encoder_phys_cmd_wait_for_commit_done(
 	return _dpu_encoder_phys_cmd_wait_for_ctl_start(phys_enc);
 }
 
-static int dpu_encoder_phys_cmd_wait_for_vblank(
-		struct dpu_encoder_phys *phys_enc)
-{
-	int rc = 0;
-	struct dpu_encoder_phys_cmd *cmd_enc;
-	struct dpu_encoder_wait_info wait_info;
-
-	cmd_enc = to_dpu_encoder_phys_cmd(phys_enc);
-
-	/* only required for master controller */
-	if (!dpu_encoder_phys_cmd_is_master(phys_enc))
-		return rc;
-
-	wait_info.wq = &cmd_enc->pending_vblank_wq;
-	wait_info.atomic_cnt = &cmd_enc->pending_vblank_cnt;
-	wait_info.timeout_ms = KICKOFF_TIMEOUT_MS;
-
-	atomic_inc(&cmd_enc->pending_vblank_cnt);
-
-	rc = dpu_encoder_helper_wait_for_irq(phys_enc,
-			phys_enc->irq[INTR_IDX_RDPTR],
-			dpu_encoder_phys_cmd_te_rd_ptr_irq,
-			&wait_info);
-
-	return rc;
-}
-
 static void dpu_encoder_phys_cmd_handle_post_kickoff(
 		struct dpu_encoder_phys *phys_enc)
 {
@@ -731,12 +707,10 @@ static void dpu_encoder_phys_cmd_init_ops(
 	ops->atomic_mode_set = dpu_encoder_phys_cmd_atomic_mode_set;
 	ops->enable = dpu_encoder_phys_cmd_enable;
 	ops->disable = dpu_encoder_phys_cmd_disable;
-	ops->destroy = dpu_encoder_phys_cmd_destroy;
 	ops->control_vblank_irq = dpu_encoder_phys_cmd_control_vblank_irq;
 	ops->wait_for_commit_done = dpu_encoder_phys_cmd_wait_for_commit_done;
 	ops->prepare_for_kickoff = dpu_encoder_phys_cmd_prepare_for_kickoff;
 	ops->wait_for_tx_complete = dpu_encoder_phys_cmd_wait_for_tx_complete;
-	ops->wait_for_vblank = dpu_encoder_phys_cmd_wait_for_vblank;
 	ops->trigger_start = dpu_encoder_phys_cmd_trigger_start;
 	ops->needs_single_flush = dpu_encoder_phys_cmd_needs_single_flush;
 	ops->irq_control = dpu_encoder_phys_cmd_irq_control;
@@ -746,7 +720,7 @@ static void dpu_encoder_phys_cmd_init_ops(
 	ops->get_line_count = dpu_encoder_phys_cmd_get_line_count;
 }
 
-struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(
+struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(struct drm_device *dev,
 		struct dpu_enc_phys_init_params *p)
 {
 	struct dpu_encoder_phys *phys_enc = NULL;
@@ -754,7 +728,7 @@ struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(
 
 	DPU_DEBUG("intf\n");
 
-	cmd_enc = kzalloc(sizeof(*cmd_enc), GFP_KERNEL);
+	cmd_enc = drmm_kzalloc(dev, sizeof(*cmd_enc), GFP_KERNEL);
 	if (!cmd_enc) {
 		DPU_ERROR("failed to allocate\n");
 		return ERR_PTR(-ENOMEM);
@@ -763,6 +737,9 @@ struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(
 
 	dpu_encoder_phys_init(phys_enc, p);
 
+	mutex_init(&phys_enc->vblank_ctl_lock);
+	phys_enc->vblank_refcount = 0;
+
 	dpu_encoder_phys_cmd_init_ops(&phys_enc->ops);
 	phys_enc->intf_mode = INTF_MODE_CMD;
 	cmd_enc->stream_sel = 0;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
index a01fda711883..d0f56c5c4cce 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
@@ -11,6 +11,8 @@
 #include "dpu_trace.h"
 #include "disp/msm_disp_snapshot.h"
 
+#include <drm/drm_managed.h>
+
 #define DPU_DEBUG_VIDENC(e, fmt, ...) DPU_DEBUG("enc%d intf%d " fmt, \
 		(e) && (e)->parent ? \
 		(e)->parent->base.id : -1, \
@@ -364,7 +366,8 @@ static int dpu_encoder_phys_vid_control_vblank_irq(
 	int ret = 0;
 	int refcount;
 
-	refcount = atomic_read(&phys_enc->vblank_refcount);
+	mutex_lock(&phys_enc->vblank_ctl_lock);
+	refcount = phys_enc->vblank_refcount;
 
 	/* Slave encoders don't report vblank */
 	if (!dpu_encoder_phys_vid_is_master(phys_enc))
@@ -377,18 +380,26 @@ static int dpu_encoder_phys_vid_control_vblank_irq(
 	}
 
 	DRM_DEBUG_VBL("id:%u enable=%d/%d\n", DRMID(phys_enc->parent), enable,
-		      atomic_read(&phys_enc->vblank_refcount));
+		      refcount);
 
-	if (enable && atomic_inc_return(&phys_enc->vblank_refcount) == 1)
-		ret = dpu_core_irq_register_callback(phys_enc->dpu_kms,
-				phys_enc->irq[INTR_IDX_VSYNC],
-				dpu_encoder_phys_vid_vblank_irq,
-				phys_enc);
-	else if (!enable && atomic_dec_return(&phys_enc->vblank_refcount) == 0)
-		ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms,
-				phys_enc->irq[INTR_IDX_VSYNC]);
+	if (enable) {
+		if (phys_enc->vblank_refcount == 0)
+			ret = dpu_core_irq_register_callback(phys_enc->dpu_kms,
+					phys_enc->irq[INTR_IDX_VSYNC],
+					dpu_encoder_phys_vid_vblank_irq,
+					phys_enc);
+		if (!ret)
+			phys_enc->vblank_refcount++;
+	} else if (!enable) {
+		if (phys_enc->vblank_refcount == 1)
+			ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms,
+					phys_enc->irq[INTR_IDX_VSYNC]);
+		if (!ret)
+			phys_enc->vblank_refcount--;
+	}
 
 end:
+	mutex_unlock(&phys_enc->vblank_ctl_lock);
 	if (ret) {
 		DRM_ERROR("failed: id:%u intf:%d ret:%d enable:%d refcnt:%d\n",
 			  DRMID(phys_enc->parent),
@@ -438,13 +449,7 @@ skip_flush:
 		phys_enc->enable_state = DPU_ENC_ENABLING;
 }
 
-static void dpu_encoder_phys_vid_destroy(struct dpu_encoder_phys *phys_enc)
-{
-	DPU_DEBUG_VIDENC(phys_enc, "\n");
-	kfree(phys_enc);
-}
-
-static int dpu_encoder_phys_vid_wait_for_vblank(
+static int dpu_encoder_phys_vid_wait_for_tx_complete(
 		struct dpu_encoder_phys *phys_enc)
 {
 	struct dpu_encoder_wait_info wait_info;
@@ -558,7 +563,7 @@ static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc)
 	 * scanout buffer) don't latch properly..
 	 */
 	if (dpu_encoder_phys_vid_is_master(phys_enc)) {
-		ret = dpu_encoder_phys_vid_wait_for_vblank(phys_enc);
+		ret = dpu_encoder_phys_vid_wait_for_tx_complete(phys_enc);
 		if (ret) {
 			atomic_set(&phys_enc->pending_kickoff_cnt, 0);
 			DRM_ERROR("wait disable failed: id:%u intf:%d ret:%d\n",
@@ -578,7 +583,7 @@ static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc)
 		spin_lock_irqsave(phys_enc->enc_spinlock, lock_flags);
 		dpu_encoder_phys_inc_pending(phys_enc);
 		spin_unlock_irqrestore(phys_enc->enc_spinlock, lock_flags);
-		ret = dpu_encoder_phys_vid_wait_for_vblank(phys_enc);
+		ret = dpu_encoder_phys_vid_wait_for_tx_complete(phys_enc);
 		if (ret) {
 			atomic_set(&phys_enc->pending_kickoff_cnt, 0);
 			DRM_ERROR("wait disable failed: id:%u intf:%d ret:%d\n",
@@ -618,7 +623,7 @@ static void dpu_encoder_phys_vid_irq_control(struct dpu_encoder_phys *phys_enc,
 	trace_dpu_enc_phys_vid_irq_ctrl(DRMID(phys_enc->parent),
 			    phys_enc->hw_intf->idx - INTF_0,
 			    enable,
-			    atomic_read(&phys_enc->vblank_refcount));
+			   phys_enc->vblank_refcount);
 
 	if (enable) {
 		ret = dpu_encoder_phys_vid_control_vblank_irq(phys_enc, true);
@@ -681,11 +686,9 @@ static void dpu_encoder_phys_vid_init_ops(struct dpu_encoder_phys_ops *ops)
 	ops->atomic_mode_set = dpu_encoder_phys_vid_atomic_mode_set;
 	ops->enable = dpu_encoder_phys_vid_enable;
 	ops->disable = dpu_encoder_phys_vid_disable;
-	ops->destroy = dpu_encoder_phys_vid_destroy;
 	ops->control_vblank_irq = dpu_encoder_phys_vid_control_vblank_irq;
 	ops->wait_for_commit_done = dpu_encoder_phys_vid_wait_for_commit_done;
-	ops->wait_for_vblank = dpu_encoder_phys_vid_wait_for_vblank;
-	ops->wait_for_tx_complete = dpu_encoder_phys_vid_wait_for_vblank;
+	ops->wait_for_tx_complete = dpu_encoder_phys_vid_wait_for_tx_complete;
 	ops->irq_control = dpu_encoder_phys_vid_irq_control;
 	ops->prepare_for_kickoff = dpu_encoder_phys_vid_prepare_for_kickoff;
 	ops->handle_post_kickoff = dpu_encoder_phys_vid_handle_post_kickoff;
@@ -694,7 +697,7 @@ static void dpu_encoder_phys_vid_init_ops(struct dpu_encoder_phys_ops *ops)
 	ops->get_frame_count = dpu_encoder_phys_vid_get_frame_count;
 }
 
-struct dpu_encoder_phys *dpu_encoder_phys_vid_init(
+struct dpu_encoder_phys *dpu_encoder_phys_vid_init(struct drm_device *dev,
 		struct dpu_enc_phys_init_params *p)
 {
 	struct dpu_encoder_phys *phys_enc = NULL;
@@ -704,7 +707,7 @@ struct dpu_encoder_phys *dpu_encoder_phys_vid_init(
 		return ERR_PTR(-EINVAL);
 	}
 
-	phys_enc = kzalloc(sizeof(*phys_enc), GFP_KERNEL);
+	phys_enc = drmm_kzalloc(dev, sizeof(*phys_enc), GFP_KERNEL);
 	if (!phys_enc) {
 		DPU_ERROR("failed to create encoder due to memory allocation error\n");
 		return ERR_PTR(-ENOMEM);
@@ -713,6 +716,8 @@ struct dpu_encoder_phys *dpu_encoder_phys_vid_init(
 	DPU_DEBUG_VIDENC(phys_enc, "\n");
 
 	dpu_encoder_phys_init(phys_enc, p);
+	mutex_init(&phys_enc->vblank_ctl_lock);
+	phys_enc->vblank_refcount = 0;
 
 	dpu_encoder_phys_vid_init_ops(&phys_enc->ops);
 	phys_enc->intf_mode = INTF_MODE_VIDEO;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
index 0b6a761d68b7..4cd2d9e3131a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
@@ -8,6 +8,7 @@
 #include <linux/debugfs.h>
 
 #include <drm/drm_framebuffer.h>
+#include <drm/drm_managed.h>
 
 #include "dpu_encoder_phys.h"
 #include "dpu_formats.h"
@@ -206,13 +207,14 @@ static void dpu_encoder_phys_wb_setup_fb(struct dpu_encoder_phys *phys_enc,
 }
 
 /**
- * dpu_encoder_phys_wb_setup_cdp - setup chroma down prefetch block
+ * dpu_encoder_phys_wb_setup_ctl - setup wb pipeline for ctl path
  * @phys_enc:Pointer to physical encoder
  */
-static void dpu_encoder_phys_wb_setup_cdp(struct dpu_encoder_phys *phys_enc)
+static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc)
 {
 	struct dpu_hw_wb *hw_wb;
 	struct dpu_hw_ctl *ctl;
+	struct dpu_hw_cdm *hw_cdm;
 
 	if (!phys_enc) {
 		DPU_ERROR("invalid encoder\n");
@@ -221,6 +223,7 @@ static void dpu_encoder_phys_wb_setup_cdp(struct dpu_encoder_phys *phys_enc)
 
 	hw_wb = phys_enc->hw_wb;
 	ctl = phys_enc->hw_ctl;
+	hw_cdm = phys_enc->hw_cdm;
 
 	if (test_bit(DPU_CTL_ACTIVE_CFG, &ctl->caps->features) &&
 		(phys_enc->hw_ctl &&
@@ -237,6 +240,9 @@ static void dpu_encoder_phys_wb_setup_cdp(struct dpu_encoder_phys *phys_enc)
 		if (mode_3d && hw_pp && hw_pp->merge_3d)
 			intf_cfg.merge_3d = hw_pp->merge_3d->idx;
 
+		if (hw_cdm)
+			intf_cfg.cdm = hw_cdm->idx;
+
 		if (phys_enc->hw_pp->merge_3d && phys_enc->hw_pp->merge_3d->ops.setup_3d_mode)
 			phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d,
 					mode_3d);
@@ -259,6 +265,96 @@ static void dpu_encoder_phys_wb_setup_cdp(struct dpu_encoder_phys *phys_enc)
 }
 
 /**
+ * dpu_encoder_helper_phys_setup_cdm - setup chroma down sampling block
+ *                                     This API does not handle DPU_CHROMA_H1V2.
+ * @phys_enc:Pointer to physical encoder
+ */
+static void dpu_encoder_helper_phys_setup_cdm(struct dpu_encoder_phys *phys_enc)
+{
+	struct dpu_hw_cdm *hw_cdm;
+	struct dpu_hw_cdm_cfg *cdm_cfg;
+	struct dpu_hw_pingpong *hw_pp;
+	struct dpu_encoder_phys_wb *wb_enc;
+	const struct msm_format *format;
+	const struct dpu_format *dpu_fmt;
+	struct drm_writeback_job *wb_job;
+	int ret;
+
+	if (!phys_enc)
+		return;
+
+	wb_enc = to_dpu_encoder_phys_wb(phys_enc);
+	cdm_cfg = &wb_enc->cdm_cfg;
+	hw_pp = phys_enc->hw_pp;
+	hw_cdm = phys_enc->hw_cdm;
+	wb_job = wb_enc->wb_job;
+
+	format = msm_framebuffer_format(wb_enc->wb_job->fb);
+	dpu_fmt = dpu_get_dpu_format_ext(format->pixel_format, wb_job->fb->modifier);
+
+	if (!hw_cdm)
+		return;
+
+	if (!DPU_FORMAT_IS_YUV(dpu_fmt)) {
+		DPU_DEBUG("[enc:%d] cdm_disable fmt:%x\n", DRMID(phys_enc->parent),
+			  dpu_fmt->base.pixel_format);
+		if (hw_cdm->ops.bind_pingpong_blk)
+			hw_cdm->ops.bind_pingpong_blk(hw_cdm, PINGPONG_NONE);
+
+		return;
+	}
+
+	memset(cdm_cfg, 0, sizeof(struct dpu_hw_cdm_cfg));
+
+	cdm_cfg->output_width = wb_job->fb->width;
+	cdm_cfg->output_height = wb_job->fb->height;
+	cdm_cfg->output_fmt = dpu_fmt;
+	cdm_cfg->output_type = CDM_CDWN_OUTPUT_WB;
+	cdm_cfg->output_bit_depth = DPU_FORMAT_IS_DX(dpu_fmt) ?
+			CDM_CDWN_OUTPUT_10BIT : CDM_CDWN_OUTPUT_8BIT;
+	cdm_cfg->csc_cfg = &dpu_csc10_rgb2yuv_601l;
+
+	/* enable 10 bit logic */
+	switch (cdm_cfg->output_fmt->chroma_sample) {
+	case DPU_CHROMA_RGB:
+		cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
+		cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
+		break;
+	case DPU_CHROMA_H2V1:
+		cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
+		cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
+		break;
+	case DPU_CHROMA_420:
+		cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
+		cdm_cfg->v_cdwn_type = CDM_CDWN_OFFSITE;
+		break;
+	case DPU_CHROMA_H1V2:
+	default:
+		DPU_ERROR("[enc:%d] unsupported chroma sampling type\n",
+			  DRMID(phys_enc->parent));
+		cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
+		cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
+		break;
+	}
+
+	DPU_DEBUG("[enc:%d] cdm_enable:%d,%d,%X,%d,%d,%d,%d]\n",
+		  DRMID(phys_enc->parent), cdm_cfg->output_width,
+		  cdm_cfg->output_height, cdm_cfg->output_fmt->base.pixel_format,
+		  cdm_cfg->output_type, cdm_cfg->output_bit_depth,
+		  cdm_cfg->h_cdwn_type, cdm_cfg->v_cdwn_type);
+
+	if (hw_cdm->ops.enable) {
+		cdm_cfg->pp_id = hw_pp->idx;
+		ret = hw_cdm->ops.enable(hw_cdm, cdm_cfg);
+		if (ret < 0) {
+			DPU_ERROR("[enc:%d] failed to enable CDM; ret:%d\n",
+				  DRMID(phys_enc->parent), ret);
+			return;
+		}
+	}
+}
+
+/**
  * dpu_encoder_phys_wb_atomic_check - verify and fixup given atomic states
  * @phys_enc:	Pointer to physical encoder
  * @crtc_state:	Pointer to CRTC atomic state
@@ -307,7 +403,7 @@ static int dpu_encoder_phys_wb_atomic_check(
 		return -EINVAL;
 	}
 
-	return 0;
+	return drm_atomic_helper_check_wb_connector_state(conn_state->connector, conn_state->state);
 }
 
 
@@ -320,6 +416,7 @@ static void _dpu_encoder_phys_wb_update_flush(struct dpu_encoder_phys *phys_enc)
 	struct dpu_hw_wb *hw_wb;
 	struct dpu_hw_ctl *hw_ctl;
 	struct dpu_hw_pingpong *hw_pp;
+	struct dpu_hw_cdm *hw_cdm;
 	u32 pending_flush = 0;
 
 	if (!phys_enc)
@@ -328,6 +425,7 @@ static void _dpu_encoder_phys_wb_update_flush(struct dpu_encoder_phys *phys_enc)
 	hw_wb = phys_enc->hw_wb;
 	hw_pp = phys_enc->hw_pp;
 	hw_ctl = phys_enc->hw_ctl;
+	hw_cdm = phys_enc->hw_cdm;
 
 	DPU_DEBUG("[wb:%d]\n", hw_wb->idx - WB_0);
 
@@ -343,6 +441,9 @@ static void _dpu_encoder_phys_wb_update_flush(struct dpu_encoder_phys *phys_enc)
 		hw_ctl->ops.update_pending_flush_merge_3d(hw_ctl,
 				hw_pp->merge_3d->idx);
 
+	if (hw_cdm && hw_ctl->ops.update_pending_flush_cdm)
+		hw_ctl->ops.update_pending_flush_cdm(hw_ctl, hw_cdm->idx);
+
 	if (hw_ctl->ops.get_pending_flush)
 		pending_flush = hw_ctl->ops.get_pending_flush(hw_ctl);
 
@@ -374,8 +475,9 @@ static void dpu_encoder_phys_wb_setup(
 
 	dpu_encoder_phys_wb_setup_fb(phys_enc, fb);
 
-	dpu_encoder_phys_wb_setup_cdp(phys_enc);
+	dpu_encoder_helper_phys_setup_cdm(phys_enc);
 
+	dpu_encoder_phys_wb_setup_ctl(phys_enc);
 }
 
 /**
@@ -580,20 +682,6 @@ static void dpu_encoder_phys_wb_disable(struct dpu_encoder_phys *phys_enc)
 	phys_enc->enable_state = DPU_ENC_DISABLED;
 }
 
-/**
- * dpu_encoder_phys_wb_destroy - destroy writeback encoder
- * @phys_enc:	Pointer to physical encoder
- */
-static void dpu_encoder_phys_wb_destroy(struct dpu_encoder_phys *phys_enc)
-{
-	if (!phys_enc)
-		return;
-
-	DPU_DEBUG("[wb:%d]\n", phys_enc->hw_wb->idx - WB_0);
-
-	kfree(phys_enc);
-}
-
 static void dpu_encoder_phys_wb_prepare_wb_job(struct dpu_encoder_phys *phys_enc,
 		struct drm_writeback_job *job)
 {
@@ -689,7 +777,6 @@ static void dpu_encoder_phys_wb_init_ops(struct dpu_encoder_phys_ops *ops)
 	ops->atomic_mode_set = dpu_encoder_phys_wb_atomic_mode_set;
 	ops->enable = dpu_encoder_phys_wb_enable;
 	ops->disable = dpu_encoder_phys_wb_disable;
-	ops->destroy = dpu_encoder_phys_wb_destroy;
 	ops->atomic_check = dpu_encoder_phys_wb_atomic_check;
 	ops->wait_for_commit_done = dpu_encoder_phys_wb_wait_for_commit_done;
 	ops->prepare_for_kickoff = dpu_encoder_phys_wb_prepare_for_kickoff;
@@ -705,9 +792,10 @@ static void dpu_encoder_phys_wb_init_ops(struct dpu_encoder_phys_ops *ops)
 
 /**
  * dpu_encoder_phys_wb_init - initialize writeback encoder
+ * @dev:  Corresponding device for devres management
  * @p:	Pointer to init info structure with initialization params
  */
-struct dpu_encoder_phys *dpu_encoder_phys_wb_init(
+struct dpu_encoder_phys *dpu_encoder_phys_wb_init(struct drm_device *dev,
 		struct dpu_enc_phys_init_params *p)
 {
 	struct dpu_encoder_phys *phys_enc = NULL;
@@ -720,7 +808,7 @@ struct dpu_encoder_phys *dpu_encoder_phys_wb_init(
 		return ERR_PTR(-EINVAL);
 	}
 
-	wb_enc = kzalloc(sizeof(*wb_enc), GFP_KERNEL);
+	wb_enc = drmm_kzalloc(dev, sizeof(*wb_enc), GFP_KERNEL);
 	if (!wb_enc) {
 		DPU_ERROR("failed to allocate wb phys_enc enc\n");
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index a1aada630780..54e8717403a0 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -22,23 +22,14 @@
 	BIT(DPU_SSPP_CSC_10BIT))
 
 #define VIG_MSM8998_MASK \
-	(VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED3))
+	(VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE))
 
 #define VIG_SDM845_MASK \
-	(VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3))
+	(VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE))
 
 #define VIG_SDM845_MASK_SDMA \
 	(VIG_SDM845_MASK | BIT(DPU_SSPP_SMART_DMA_V2))
 
-#define VIG_SC7180_MASK \
-	(VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED4))
-
-#define VIG_SM6125_MASK \
-	(VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3LITE))
-
-#define VIG_SC7180_MASK_SDMA \
-	(VIG_SC7180_MASK | BIT(DPU_SSPP_SMART_DMA_V2))
-
 #define VIG_QCM2290_MASK (VIG_BASE_MASK | BIT(DPU_SSPP_QOS_8LVL))
 
 #define DMA_MSM8998_MASK \
@@ -47,7 +38,7 @@
 	BIT(DPU_SSPP_CDP) | BIT(DPU_SSPP_EXCL_RECT))
 
 #define VIG_SC7280_MASK \
-	(VIG_SC7180_MASK | BIT(DPU_SSPP_INLINE_ROTATION))
+	(VIG_SDM845_MASK | BIT(DPU_SSPP_INLINE_ROTATION))
 
 #define VIG_SC7280_MASK_SDMA \
 	(VIG_SC7280_MASK | BIT(DPU_SSPP_SMART_DMA_V2))
@@ -211,7 +202,7 @@ static const u32 rotation_v2_formats[] = {
 	/* TODO add formats after validation */
 };
 
-static const uint32_t wb2_formats[] = {
+static const u32 wb2_formats_rgb[] = {
 	DRM_FORMAT_RGB565,
 	DRM_FORMAT_BGR565,
 	DRM_FORMAT_RGB888,
@@ -245,21 +236,56 @@ static const uint32_t wb2_formats[] = {
 	DRM_FORMAT_XBGR4444,
 };
 
+static const u32 wb2_formats_rgb_yuv[] = {
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_BGR565,
+	DRM_FORMAT_RGB888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_RGBA8888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_RGBX8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_RGBA5551,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_RGBX5551,
+	DRM_FORMAT_ARGB4444,
+	DRM_FORMAT_RGBA4444,
+	DRM_FORMAT_RGBX4444,
+	DRM_FORMAT_XRGB4444,
+	DRM_FORMAT_BGR565,
+	DRM_FORMAT_BGR888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_BGRA8888,
+	DRM_FORMAT_BGRX8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ABGR1555,
+	DRM_FORMAT_BGRA5551,
+	DRM_FORMAT_XBGR1555,
+	DRM_FORMAT_BGRX5551,
+	DRM_FORMAT_ABGR4444,
+	DRM_FORMAT_BGRA4444,
+	DRM_FORMAT_BGRX4444,
+	DRM_FORMAT_XBGR4444,
+	DRM_FORMAT_NV12,
+};
+
 /*************************************************************
  * SSPP sub blocks config
  *************************************************************/
 
+#define SSPP_SCALER_VER(maj, min) (((maj) << 16) | (min))
+
 /* SSPP common configuration */
-#define _VIG_SBLK(sdma_pri, qseed_ver) \
+#define _VIG_SBLK(scaler_ver) \
 	{ \
 	.maxdwnscale = MAX_DOWNSCALE_RATIO, \
 	.maxupscale = MAX_UPSCALE_RATIO, \
-	.smart_dma_priority = sdma_pri, \
 	.scaler_blk = {.name = "scaler", \
-		.id = qseed_ver, \
+		.version = scaler_ver, \
 		.base = 0xa00, .len = 0xa0,}, \
 	.csc_blk = {.name = "csc", \
-		.id = DPU_SSPP_CSC_10BIT, \
 		.base = 0x1a00, .len = 0x100,}, \
 	.format_list = plane_formats_yuv, \
 	.num_formats = ARRAY_SIZE(plane_formats_yuv), \
@@ -268,16 +294,14 @@ static const uint32_t wb2_formats[] = {
 	.rotation_cfg = NULL, \
 	}
 
-#define _VIG_SBLK_ROT(sdma_pri, qseed_ver, rot_cfg) \
+#define _VIG_SBLK_ROT(scaler_ver, rot_cfg) \
 	{ \
 	.maxdwnscale = MAX_DOWNSCALE_RATIO, \
 	.maxupscale = MAX_UPSCALE_RATIO, \
-	.smart_dma_priority = sdma_pri, \
 	.scaler_blk = {.name = "scaler", \
-		.id = qseed_ver, \
+		.version = scaler_ver, \
 		.base = 0xa00, .len = 0xa0,}, \
 	.csc_blk = {.name = "csc", \
-		.id = DPU_SSPP_CSC_10BIT, \
 		.base = 0x1a00, .len = 0x100,}, \
 	.format_list = plane_formats_yuv, \
 	.num_formats = ARRAY_SIZE(plane_formats_yuv), \
@@ -286,91 +310,64 @@ static const uint32_t wb2_formats[] = {
 	.rotation_cfg = rot_cfg, \
 	}
 
-#define _DMA_SBLK(sdma_pri) \
+#define _VIG_SBLK_NOSCALE() \
+	{ \
+	.maxdwnscale = SSPP_UNITY_SCALE, \
+	.maxupscale = SSPP_UNITY_SCALE, \
+	.format_list = plane_formats_yuv, \
+	.num_formats = ARRAY_SIZE(plane_formats_yuv), \
+	.virt_format_list = plane_formats, \
+	.virt_num_formats = ARRAY_SIZE(plane_formats), \
+	}
+
+#define _DMA_SBLK() \
 	{ \
 	.maxdwnscale = SSPP_UNITY_SCALE, \
 	.maxupscale = SSPP_UNITY_SCALE, \
-	.smart_dma_priority = sdma_pri, \
 	.format_list = plane_formats, \
 	.num_formats = ARRAY_SIZE(plane_formats), \
 	.virt_format_list = plane_formats, \
 	.virt_num_formats = ARRAY_SIZE(plane_formats), \
 	}
 
-static const struct dpu_sspp_sub_blks msm8998_vig_sblk_0 =
-				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3);
-static const struct dpu_sspp_sub_blks msm8998_vig_sblk_1 =
-				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3);
-static const struct dpu_sspp_sub_blks msm8998_vig_sblk_2 =
-				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3);
-static const struct dpu_sspp_sub_blks msm8998_vig_sblk_3 =
-				_VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3);
-
 static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = {
 	.rot_maxheight = 1088,
 	.rot_num_formats = ARRAY_SIZE(rotation_v2_formats),
 	.rot_format_list = rotation_v2_formats,
 };
 
-static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 =
-				_VIG_SBLK(5, DPU_SSPP_SCALER_QSEED3);
-static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 =
-				_VIG_SBLK(6, DPU_SSPP_SCALER_QSEED3);
-static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 =
-				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED3);
-static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 =
-				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED3);
-
-static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK(1);
-static const struct dpu_sspp_sub_blks sdm845_dma_sblk_1 = _DMA_SBLK(2);
-static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = _DMA_SBLK(3);
-static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK(4);
-
-static const struct dpu_sspp_sub_blks sc7180_vig_sblk_0 =
-				_VIG_SBLK(4, DPU_SSPP_SCALER_QSEED4);
-
-static const struct dpu_sspp_sub_blks sc7280_vig_sblk_0 =
-			_VIG_SBLK_ROT(4, DPU_SSPP_SCALER_QSEED4, &dpu_rot_sc7280_cfg_v2);
-
-static const struct dpu_sspp_sub_blks sm6115_vig_sblk_0 =
-				_VIG_SBLK(2, DPU_SSPP_SCALER_QSEED4);
-
-static const struct dpu_sspp_sub_blks sm6125_vig_sblk_0 =
-				_VIG_SBLK(3, DPU_SSPP_SCALER_QSEED3LITE);
-
-static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 =
-				_VIG_SBLK(5, DPU_SSPP_SCALER_QSEED4);
-static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 =
-				_VIG_SBLK(6, DPU_SSPP_SCALER_QSEED4);
-static const struct dpu_sspp_sub_blks sm8250_vig_sblk_2 =
-				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4);
-static const struct dpu_sspp_sub_blks sm8250_vig_sblk_3 =
-				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4);
-
-static const struct dpu_sspp_sub_blks sm8550_vig_sblk_0 =
-				_VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4);
-static const struct dpu_sspp_sub_blks sm8550_vig_sblk_1 =
-				_VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4);
-static const struct dpu_sspp_sub_blks sm8550_vig_sblk_2 =
-				_VIG_SBLK(9, DPU_SSPP_SCALER_QSEED4);
-static const struct dpu_sspp_sub_blks sm8550_vig_sblk_3 =
-				_VIG_SBLK(10, DPU_SSPP_SCALER_QSEED4);
-static const struct dpu_sspp_sub_blks sm8550_dma_sblk_4 = _DMA_SBLK(5);
-static const struct dpu_sspp_sub_blks sm8550_dma_sblk_5 = _DMA_SBLK(6);
-
-#define _VIG_SBLK_NOSCALE(sdma_pri) \
-	{ \
-	.maxdwnscale = SSPP_UNITY_SCALE, \
-	.maxupscale = SSPP_UNITY_SCALE, \
-	.smart_dma_priority = sdma_pri, \
-	.format_list = plane_formats_yuv, \
-	.num_formats = ARRAY_SIZE(plane_formats_yuv), \
-	.virt_format_list = plane_formats, \
-	.virt_num_formats = ARRAY_SIZE(plane_formats), \
-	}
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_noscale =
+				_VIG_SBLK_NOSCALE();
+
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_1_2 =
+				_VIG_SBLK(SSPP_SCALER_VER(1, 2));
+
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_1_3 =
+				_VIG_SBLK(SSPP_SCALER_VER(1, 3));
+
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_1_4 =
+				_VIG_SBLK(SSPP_SCALER_VER(1, 4));
+
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_2_4 =
+				_VIG_SBLK(SSPP_SCALER_VER(2, 4));
+
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_0 =
+				_VIG_SBLK(SSPP_SCALER_VER(3, 0));
+
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_0_rot_v2 =
+			_VIG_SBLK_ROT(SSPP_SCALER_VER(3, 0),
+				      &dpu_rot_sc7280_cfg_v2);
+
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_1 =
+				_VIG_SBLK(SSPP_SCALER_VER(3, 1));
+
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_2 =
+				_VIG_SBLK(SSPP_SCALER_VER(3, 2));
+
+static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_3 =
+				_VIG_SBLK(SSPP_SCALER_VER(3, 3));
 
-static const struct dpu_sspp_sub_blks qcm2290_vig_sblk_0 = _VIG_SBLK_NOSCALE(2);
-static const struct dpu_sspp_sub_blks qcm2290_dma_sblk_0 = _DMA_SBLK(1);
+static const struct dpu_sspp_sub_blks dpu_dma_sblk = _DMA_SBLK();
 
 /*************************************************************
  * MIXER sub blocks config
@@ -422,12 +419,12 @@ static const struct dpu_lm_sub_blks qcm2290_lm_sblk = {
  * DSPP sub blocks config
  *************************************************************/
 static const struct dpu_dspp_sub_blks msm8998_dspp_sblk = {
-	.pcc = {.name = "pcc", .id = DPU_DSPP_PCC, .base = 0x1700,
+	.pcc = {.name = "pcc", .base = 0x1700,
 		.len = 0x90, .version = 0x10007},
 };
 
 static const struct dpu_dspp_sub_blks sdm845_dspp_sblk = {
-	.pcc = {.name = "pcc", .id = DPU_DSPP_PCC, .base = 0x1700,
+	.pcc = {.name = "pcc", .base = 0x1700,
 		.len = 0x90, .version = 0x40000},
 };
 
@@ -435,19 +432,19 @@ static const struct dpu_dspp_sub_blks sdm845_dspp_sblk = {
  * PINGPONG sub blocks config
  *************************************************************/
 static const struct dpu_pingpong_sub_blks sdm845_pp_sblk_te = {
-	.te2 = {.name = "te2", .id = DPU_PINGPONG_TE2, .base = 0x2000, .len = 0x0,
+	.te2 = {.name = "te2", .base = 0x2000, .len = 0x0,
 		.version = 0x1},
-	.dither = {.name = "dither", .id = DPU_PINGPONG_DITHER, .base = 0x30e0,
+	.dither = {.name = "dither", .base = 0x30e0,
 		.len = 0x20, .version = 0x10000},
 };
 
 static const struct dpu_pingpong_sub_blks sdm845_pp_sblk = {
-	.dither = {.name = "dither", .id = DPU_PINGPONG_DITHER, .base = 0x30e0,
+	.dither = {.name = "dither", .base = 0x30e0,
 		.len = 0x20, .version = 0x10000},
 };
 
 static const struct dpu_pingpong_sub_blks sc7280_pp_sblk = {
-	.dither = {.name = "dither", .id = DPU_PINGPONG_DITHER, .base = 0xe0,
+	.dither = {.name = "dither", .base = 0xe0,
 	.len = 0x20, .version = 0x20000},
 };
 
@@ -465,6 +462,16 @@ static const struct dpu_dsc_sub_blks dsc_sblk_1 = {
 };
 
 /*************************************************************
+ * CDM block config
+ *************************************************************/
+static const struct dpu_cdm_cfg sc7280_cdm = {
+	.name = "cdm_0",
+	.id = CDM_0,
+	.len = 0x228,
+	.base = 0x79200,
+};
+
+/*************************************************************
  * VBIF sub blocks config
  *************************************************************/
 /* VBIF QOS remap */
@@ -472,6 +479,7 @@ static const u32 msm8998_rt_pri_lvl[] = {1, 2, 2, 2};
 static const u32 msm8998_nrt_pri_lvl[] = {1, 1, 1, 1};
 static const u32 sdm845_rt_pri_lvl[] = {3, 3, 4, 4, 5, 5, 6, 6};
 static const u32 sdm845_nrt_pri_lvl[] = {3, 3, 3, 3, 3, 3, 3, 3};
+static const u32 sm8650_rt_pri_lvl[] = {4, 4, 5, 5, 5, 5, 5, 6};
 
 static const struct dpu_vbif_dynamic_ot_cfg msm8998_ot_rdwr_cfg[] = {
 	{
@@ -558,6 +566,26 @@ static const struct dpu_vbif_cfg sm8550_vbif[] = {
 	},
 };
 
+static const struct dpu_vbif_cfg sm8650_vbif[] = {
+	{
+	.name = "vbif_rt", .id = VBIF_RT,
+	.base = 0, .len = 0x1074,
+	.features = BIT(DPU_VBIF_QOS_REMAP),
+	.xin_halt_timeout = 0x4000,
+	.qos_rp_remap_size = 0x40,
+	.qos_rt_tbl = {
+		.npriority_lvl = ARRAY_SIZE(sm8650_rt_pri_lvl),
+		.priority_lvl = sm8650_rt_pri_lvl,
+		},
+	.qos_nrt_tbl = {
+		.npriority_lvl = ARRAY_SIZE(sdm845_nrt_pri_lvl),
+		.priority_lvl = sdm845_nrt_pri_lvl,
+		},
+	.memtype_count = 16,
+	.memtype = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
+	},
+};
+
 /*************************************************************
  * PERF data config
  *************************************************************/
@@ -654,6 +682,7 @@ static const struct dpu_qos_lut_entry sc7180_qos_nrt[] = {
 #include "catalog/dpu_3_0_msm8998.h"
 
 #include "catalog/dpu_4_0_sdm845.h"
+#include "catalog/dpu_4_1_sdm670.h"
 
 #include "catalog/dpu_5_0_sm8150.h"
 #include "catalog/dpu_5_1_sc8180x.h"
@@ -673,3 +702,5 @@ static const struct dpu_qos_lut_entry sc7180_qos_nrt[] = {
 #include "catalog/dpu_8_1_sm8450.h"
 
 #include "catalog/dpu_9_0_sm8550.h"
+
+#include "catalog/dpu_10_0_sm8650.h"
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index df024e10d3a3..ba82ef4560a6 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -51,9 +51,7 @@ enum {
 /**
  * SSPP sub-blocks/features
  * @DPU_SSPP_SCALER_QSEED2,  QSEED2 algorithm support
- * @DPU_SSPP_SCALER_QSEED3,  QSEED3 alogorithm support
- * @DPU_SSPP_SCALER_QSEED3LITE,  QSEED3 Lite alogorithm support
- * @DPU_SSPP_SCALER_QSEED4,  QSEED4 algorithm support
+ * @DPU_SSPP_SCALER_QSEED3_COMPATIBLE,  QSEED3-compatible alogorithm support (includes QSEED3, QSEED3LITE and QSEED4)
  * @DPU_SSPP_SCALER_RGB,     RGB Scaler, supported by RGB pipes
  * @DPU_SSPP_CSC,            Support of Color space converion
  * @DPU_SSPP_CSC_10BIT,      Support of 10-bit Color space conversion
@@ -71,9 +69,7 @@ enum {
  */
 enum {
 	DPU_SSPP_SCALER_QSEED2 = 0x1,
-	DPU_SSPP_SCALER_QSEED3,
-	DPU_SSPP_SCALER_QSEED3LITE,
-	DPU_SSPP_SCALER_QSEED4,
+	DPU_SSPP_SCALER_QSEED3_COMPATIBLE,
 	DPU_SSPP_SCALER_RGB,
 	DPU_SSPP_CSC,
 	DPU_SSPP_CSC_10BIT,
@@ -249,49 +245,50 @@ enum {
 	unsigned long features
 
 /**
- * MACRO DPU_HW_SUBBLK_INFO - information of HW sub-block inside DPU
- * @name:              string name for debug purposes
- * @id:                enum identifying this sub-block
- * @base:              offset of this sub-block relative to the block
- *                     offset
- * @len                register block length of this sub-block
- */
-#define DPU_HW_SUBBLK_INFO \
-	char name[DPU_HW_BLK_NAME_LEN]; \
-	u32 id; \
-	u32 base; \
-	u32 len
-
-/**
  * struct dpu_scaler_blk: Scaler information
- * @info:   HW register and features supported by this sub-blk
- * @version: qseed block revision
+ * @name: string name for debug purposes
+ * @base: offset of this sub-block relative to the block offset
+ * @len: register block length of this sub-block
+ * @version: qseed block revision, on QSEED3+ platforms this is the value of
+ *           scaler_blk.base + QSEED3_HW_VERSION registers.
  */
 struct dpu_scaler_blk {
-	DPU_HW_SUBBLK_INFO;
+	char name[DPU_HW_BLK_NAME_LEN];
+	u32 base;
+	u32 len;
 	u32 version;
 };
 
 struct dpu_csc_blk {
-	DPU_HW_SUBBLK_INFO;
+	char name[DPU_HW_BLK_NAME_LEN];
+	u32 base;
+	u32 len;
 };
 
 /**
  * struct dpu_pp_blk : Pixel processing sub-blk information
- * @info:   HW register and features supported by this sub-blk
+ * @name: string name for debug purposes
+ * @base: offset of this sub-block relative to the block offset
+ * @len: register block length of this sub-block
  * @version: HW Algorithm version
  */
 struct dpu_pp_blk {
-	DPU_HW_SUBBLK_INFO;
+	char name[DPU_HW_BLK_NAME_LEN];
+	u32 base;
+	u32 len;
 	u32 version;
 };
 
 /**
  * struct dpu_dsc_blk - DSC Encoder sub-blk information
- * @info:   HW register and features supported by this sub-blk
+ * @name: string name for debug purposes
+ * @base: offset of this sub-block relative to the block offset
+ * @len: register block length of this sub-block
  */
 struct dpu_dsc_blk {
-	DPU_HW_SUBBLK_INFO;
+	char name[DPU_HW_BLK_NAME_LEN];
+	u32 base;
+	u32 len;
 };
 
 /**
@@ -341,7 +338,6 @@ struct dpu_rotation_cfg {
  * @max_mixer_width    max layer mixer line width support.
  * @max_mixer_blendstages max layer mixer blend stages or
  *                       supported z order
- * @qseed_type         qseed2 or qseed3 support.
  * @has_src_split      source split feature status
  * @has_dim_layer      dim layer feature status
  * @has_idle_pc        indicate if idle power collapse feature is supported
@@ -354,7 +350,6 @@ struct dpu_rotation_cfg {
 struct dpu_caps {
 	u32 max_mixer_width;
 	u32 max_mixer_blendstages;
-	u32 qseed_type;
 	bool has_src_split;
 	bool has_dim_layer;
 	bool has_idle_pc;
@@ -371,7 +366,6 @@ struct dpu_caps {
  * common: Pointer to common configurations shared by sub blocks
  * @maxdwnscale: max downscale ratio supported(without DECIMATION)
  * @maxupscale:  maxupscale ratio supported
- * @smart_dma_priority: hw priority of rect1 of multirect pipe
  * @max_per_pipe_bw: maximum allowable bandwidth of this pipe in kBps
  * @qseed_ver: qseed version
  * @scaler_blk:
@@ -385,7 +379,6 @@ struct dpu_caps {
 struct dpu_sspp_sub_blks {
 	u32 maxdwnscale;
 	u32 maxupscale;
-	u32 smart_dma_priority;
 	u32 max_per_pipe_bw;
 	u32 qseed_ver;
 	struct dpu_scaler_blk scaler_blk;
@@ -690,6 +683,17 @@ struct dpu_vbif_cfg {
 };
 
 /**
+ * struct dpu_cdm_cfg - information of chroma down blocks
+ * @name               string name for debug purposes
+ * @id                 enum identifying this block
+ * @base               register offset of this block
+ * @features           bit mask identifying sub-blocks/features
+ */
+struct dpu_cdm_cfg {
+	DPU_HW_BLK_INFO;
+};
+
+/**
  * Define CDP use cases
  * @DPU_PERF_CDP_UDAGE_RT: real-time use cases
  * @DPU_PERF_CDP_USAGE_NRT: non real-time use cases such as WFD
@@ -812,6 +816,8 @@ struct dpu_mdss_cfg {
 	u32 wb_count;
 	const struct dpu_wb_cfg *wb;
 
+	const struct dpu_cdm_cfg *cdm;
+
 	u32 ad_count;
 
 	u32 dspp_count;
@@ -827,6 +833,7 @@ struct dpu_mdss_cfg {
 
 extern const struct dpu_mdss_cfg dpu_msm8998_cfg;
 extern const struct dpu_mdss_cfg dpu_sdm845_cfg;
+extern const struct dpu_mdss_cfg dpu_sdm670_cfg;
 extern const struct dpu_mdss_cfg dpu_sm8150_cfg;
 extern const struct dpu_mdss_cfg dpu_sc8180x_cfg;
 extern const struct dpu_mdss_cfg dpu_sm8250_cfg;
@@ -841,5 +848,6 @@ extern const struct dpu_mdss_cfg dpu_sc7280_cfg;
 extern const struct dpu_mdss_cfg dpu_sc8280xp_cfg;
 extern const struct dpu_mdss_cfg dpu_sm8450_cfg;
 extern const struct dpu_mdss_cfg dpu_sm8550_cfg;
+extern const struct dpu_mdss_cfg dpu_sm8650_cfg;
 
 #endif /* _DPU_HW_CATALOG_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c
new file mode 100644
index 000000000000..e9cdc7934a49
--- /dev/null
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/bitfield.h>
+
+#include <drm/drm_managed.h>
+
+#include "dpu_hw_mdss.h"
+#include "dpu_hw_util.h"
+#include "dpu_hw_catalog.h"
+#include "dpu_hw_cdm.h"
+#include "dpu_kms.h"
+
+#define CDM_CSC_10_OPMODE                  0x000
+#define CDM_CSC_10_BASE                    0x004
+
+#define CDM_CDWN2_OP_MODE                  0x100
+#define CDM_CDWN2_CLAMP_OUT                0x104
+#define CDM_CDWN2_PARAMS_3D_0              0x108
+#define CDM_CDWN2_PARAMS_3D_1              0x10C
+#define CDM_CDWN2_COEFF_COSITE_H_0         0x110
+#define CDM_CDWN2_COEFF_COSITE_H_1         0x114
+#define CDM_CDWN2_COEFF_COSITE_H_2         0x118
+#define CDM_CDWN2_COEFF_OFFSITE_H_0        0x11C
+#define CDM_CDWN2_COEFF_OFFSITE_H_1        0x120
+#define CDM_CDWN2_COEFF_OFFSITE_H_2        0x124
+#define CDM_CDWN2_COEFF_COSITE_V           0x128
+#define CDM_CDWN2_COEFF_OFFSITE_V          0x12C
+#define CDM_CDWN2_OUT_SIZE                 0x130
+
+#define CDM_HDMI_PACK_OP_MODE              0x200
+#define CDM_CSC_10_MATRIX_COEFF_0          0x004
+
+#define CDM_MUX                            0x224
+
+/* CDM CDWN2 sub-block bit definitions */
+#define CDM_CDWN2_OP_MODE_EN                  BIT(0)
+#define CDM_CDWN2_OP_MODE_ENABLE_H            BIT(1)
+#define CDM_CDWN2_OP_MODE_ENABLE_V            BIT(2)
+#define CDM_CDWN2_OP_MODE_BITS_OUT_8BIT       BIT(7)
+#define CDM_CDWN2_V_PIXEL_METHOD_MASK         GENMASK(6, 5)
+#define CDM_CDWN2_H_PIXEL_METHOD_MASK         GENMASK(4, 3)
+
+/* CDM CSC10 sub-block bit definitions */
+#define CDM_CSC10_OP_MODE_EN               BIT(0)
+#define CDM_CSC10_OP_MODE_SRC_FMT_YUV      BIT(1)
+#define CDM_CSC10_OP_MODE_DST_FMT_YUV      BIT(2)
+
+/* CDM HDMI pack sub-block bit definitions */
+#define CDM_HDMI_PACK_OP_MODE_EN           BIT(0)
+
+/*
+ * Horizontal coefficients for cosite chroma downscale
+ * s13 representation of coefficients
+ */
+static u32 cosite_h_coeff[] = {0x00000016, 0x000001cc, 0x0100009e};
+
+/*
+ * Horizontal coefficients for offsite chroma downscale
+ */
+static u32 offsite_h_coeff[] = {0x000b0005, 0x01db01eb, 0x00e40046};
+
+/*
+ * Vertical coefficients for cosite chroma downscale
+ */
+static u32 cosite_v_coeff[] = {0x00080004};
+/*
+ * Vertical coefficients for offsite chroma downscale
+ */
+static u32 offsite_v_coeff[] = {0x00060002};
+
+static int dpu_hw_cdm_setup_cdwn(struct dpu_hw_cdm *ctx, struct dpu_hw_cdm_cfg *cfg)
+{
+	struct dpu_hw_blk_reg_map *c = &ctx->hw;
+	u32 opmode;
+	u32 out_size;
+
+	switch (cfg->h_cdwn_type) {
+	case CDM_CDWN_DISABLE:
+		opmode = 0;
+		break;
+	case CDM_CDWN_PIXEL_DROP:
+		opmode = CDM_CDWN2_OP_MODE_ENABLE_H |
+				FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_PIXEL_DROP);
+		break;
+	case CDM_CDWN_AVG:
+		opmode = CDM_CDWN2_OP_MODE_ENABLE_H |
+				FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_AVG);
+		break;
+	case CDM_CDWN_COSITE:
+		opmode = CDM_CDWN2_OP_MODE_ENABLE_H |
+				FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_COSITE);
+		DPU_REG_WRITE(c, CDM_CDWN2_COEFF_COSITE_H_0,
+			      cosite_h_coeff[0]);
+		DPU_REG_WRITE(c, CDM_CDWN2_COEFF_COSITE_H_1,
+			      cosite_h_coeff[1]);
+		DPU_REG_WRITE(c, CDM_CDWN2_COEFF_COSITE_H_2,
+			      cosite_h_coeff[2]);
+		break;
+	case CDM_CDWN_OFFSITE:
+		opmode = CDM_CDWN2_OP_MODE_ENABLE_H |
+				FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK, CDM_CDWN2_METHOD_OFFSITE);
+		DPU_REG_WRITE(c, CDM_CDWN2_COEFF_OFFSITE_H_0,
+			      offsite_h_coeff[0]);
+		DPU_REG_WRITE(c, CDM_CDWN2_COEFF_OFFSITE_H_1,
+			      offsite_h_coeff[1]);
+		DPU_REG_WRITE(c, CDM_CDWN2_COEFF_OFFSITE_H_2,
+			      offsite_h_coeff[2]);
+		break;
+	default:
+		DPU_ERROR("%s invalid horz down sampling type\n", __func__);
+		return -EINVAL;
+	}
+
+	switch (cfg->v_cdwn_type) {
+	case CDM_CDWN_DISABLE:
+		/* if its only Horizontal downsample, we dont need to do anything here */
+		break;
+	case CDM_CDWN_PIXEL_DROP:
+		opmode |= CDM_CDWN2_OP_MODE_ENABLE_V |
+				FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_PIXEL_DROP);
+		break;
+	case CDM_CDWN_AVG:
+		opmode |= CDM_CDWN2_OP_MODE_ENABLE_V |
+				FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_AVG);
+		break;
+	case CDM_CDWN_COSITE:
+		opmode |= CDM_CDWN2_OP_MODE_ENABLE_V |
+				FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_COSITE);
+		DPU_REG_WRITE(c,
+			      CDM_CDWN2_COEFF_COSITE_V,
+			      cosite_v_coeff[0]);
+		break;
+	case CDM_CDWN_OFFSITE:
+		opmode |= CDM_CDWN2_OP_MODE_ENABLE_V |
+				FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK,
+					   CDM_CDWN2_METHOD_OFFSITE);
+		DPU_REG_WRITE(c,
+			      CDM_CDWN2_COEFF_OFFSITE_V,
+			      offsite_v_coeff[0]);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (cfg->output_bit_depth != CDM_CDWN_OUTPUT_10BIT)
+		opmode |= CDM_CDWN2_OP_MODE_BITS_OUT_8BIT;
+
+	if (cfg->v_cdwn_type || cfg->h_cdwn_type)
+		opmode |= CDM_CDWN2_OP_MODE_EN; /* EN CDWN module */
+	else
+		opmode &= ~CDM_CDWN2_OP_MODE_EN;
+
+	out_size = (cfg->output_width & 0xFFFF) | ((cfg->output_height & 0xFFFF) << 16);
+	DPU_REG_WRITE(c, CDM_CDWN2_OUT_SIZE, out_size);
+	DPU_REG_WRITE(c, CDM_CDWN2_OP_MODE, opmode);
+	DPU_REG_WRITE(c, CDM_CDWN2_CLAMP_OUT, ((0x3FF << 16) | 0x0));
+
+	return 0;
+}
+
+static int dpu_hw_cdm_enable(struct dpu_hw_cdm *ctx, struct dpu_hw_cdm_cfg *cdm)
+{
+	struct dpu_hw_blk_reg_map *c = &ctx->hw;
+	const struct dpu_format *fmt;
+	u32 opmode = 0;
+	u32 csc = 0;
+
+	if (!ctx || !cdm)
+		return -EINVAL;
+
+	fmt = cdm->output_fmt;
+
+	if (!DPU_FORMAT_IS_YUV(fmt))
+		return -EINVAL;
+
+	dpu_hw_csc_setup(&ctx->hw, CDM_CSC_10_MATRIX_COEFF_0, cdm->csc_cfg, true);
+	dpu_hw_cdm_setup_cdwn(ctx, cdm);
+
+	if (cdm->output_type == CDM_CDWN_OUTPUT_HDMI) {
+		if (fmt->chroma_sample != DPU_CHROMA_H1V2)
+			return -EINVAL; /*unsupported format */
+		opmode = CDM_HDMI_PACK_OP_MODE_EN;
+		opmode |= (fmt->chroma_sample << 1);
+	}
+
+	csc |= CDM_CSC10_OP_MODE_DST_FMT_YUV;
+	csc &= ~CDM_CSC10_OP_MODE_SRC_FMT_YUV;
+	csc |= CDM_CSC10_OP_MODE_EN;
+
+	if (ctx && ctx->ops.bind_pingpong_blk)
+		ctx->ops.bind_pingpong_blk(ctx, cdm->pp_id);
+
+	DPU_REG_WRITE(c, CDM_CSC_10_OPMODE, csc);
+	DPU_REG_WRITE(c, CDM_HDMI_PACK_OP_MODE, opmode);
+	return 0;
+}
+
+static void dpu_hw_cdm_bind_pingpong_blk(struct dpu_hw_cdm *ctx, const enum dpu_pingpong pp)
+{
+	struct dpu_hw_blk_reg_map *c;
+	int mux_cfg;
+
+	c = &ctx->hw;
+
+	mux_cfg = DPU_REG_READ(c, CDM_MUX);
+	mux_cfg &= ~0xf;
+
+	if (pp)
+		mux_cfg |= (pp - PINGPONG_0) & 0x7;
+	else
+		mux_cfg |= 0xf;
+
+	DPU_REG_WRITE(c, CDM_MUX, mux_cfg);
+}
+
+struct dpu_hw_cdm *dpu_hw_cdm_init(struct drm_device *dev,
+				   const struct dpu_cdm_cfg *cfg, void __iomem *addr,
+				   const struct dpu_mdss_version *mdss_rev)
+{
+	struct dpu_hw_cdm *c;
+
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return ERR_PTR(-ENOMEM);
+
+	c->hw.blk_addr = addr + cfg->base;
+	c->hw.log_mask = DPU_DBG_MASK_CDM;
+
+	/* Assign ops */
+	c->idx = cfg->id;
+	c->caps = cfg;
+
+	c->ops.enable = dpu_hw_cdm_enable;
+	if (mdss_rev->core_major_ver >= 5)
+		c->ops.bind_pingpong_blk = dpu_hw_cdm_bind_pingpong_blk;
+
+	return c;
+}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h
new file mode 100644
index 000000000000..348424df87c6
--- /dev/null
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2023, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DPU_HW_CDM_H
+#define _DPU_HW_CDM_H
+
+#include "dpu_hw_mdss.h"
+#include "dpu_hw_top.h"
+
+struct dpu_hw_cdm;
+
+/**
+ * struct dpu_hw_cdm_cfg : current configuration of CDM block
+ *
+ *  @output_width:         output ROI width of CDM block
+ *  @output_height:        output ROI height of CDM block
+ *  @output_bit_depth:     output bit-depth of CDM block
+ *  @h_cdwn_type:          downsample type used for horizontal pixels
+ *  @v_cdwn_type:          downsample type used for vertical pixels
+ *  @output_fmt:           handle to dpu_format of CDM block
+ *  @csc_cfg:              handle to CSC matrix programmed for CDM block
+ *  @output_type:          interface to which CDM is paired (HDMI/WB)
+ *  @pp_id:                ping-pong block to which CDM is bound to
+ */
+struct dpu_hw_cdm_cfg {
+	u32 output_width;
+	u32 output_height;
+	u32 output_bit_depth;
+	u32 h_cdwn_type;
+	u32 v_cdwn_type;
+	const struct dpu_format *output_fmt;
+	const struct dpu_csc_cfg *csc_cfg;
+	u32 output_type;
+	int pp_id;
+};
+
+/*
+ * These values are used indicate which type of downsample is used
+ * in the horizontal/vertical direction for the CDM block.
+ */
+enum dpu_hw_cdwn_type {
+	CDM_CDWN_DISABLE,
+	CDM_CDWN_PIXEL_DROP,
+	CDM_CDWN_AVG,
+	CDM_CDWN_COSITE,
+	CDM_CDWN_OFFSITE,
+};
+
+/*
+ * CDM block can be paired with WB or HDMI block. These values match
+ * the input with which the CDM block is paired.
+ */
+enum dpu_hw_cdwn_output_type {
+	CDM_CDWN_OUTPUT_HDMI,
+	CDM_CDWN_OUTPUT_WB,
+};
+
+/*
+ * CDM block can give an 8-bit or 10-bit output. These values
+ * are used to indicate the output bit depth of CDM block
+ */
+enum dpu_hw_cdwn_output_bit_depth {
+	CDM_CDWN_OUTPUT_8BIT,
+	CDM_CDWN_OUTPUT_10BIT,
+};
+
+/*
+ * CDM block can downsample using different methods. These values
+ * are used to indicate the downsample method which can be used
+ * either in the horizontal or vertical direction.
+ */
+enum dpu_hw_cdwn_op_mode_method_h_v {
+	CDM_CDWN2_METHOD_PIXEL_DROP,
+	CDM_CDWN2_METHOD_AVG,
+	CDM_CDWN2_METHOD_COSITE,
+	CDM_CDWN2_METHOD_OFFSITE
+};
+
+/**
+ * struct dpu_hw_cdm_ops : Interface to the chroma down Hw driver functions
+ *                         Assumption is these functions will be called after
+ *                         clocks are enabled
+ *  @enable:               Enables the output to interface and programs the
+ *                         output packer
+ *  @bind_pingpong_blk:    enable/disable the connection with pingpong which
+ *                         will feed pixels to this cdm
+ */
+struct dpu_hw_cdm_ops {
+	/**
+	 * Enable the CDM module
+	 * @cdm         Pointer to chroma down context
+	 */
+	int (*enable)(struct dpu_hw_cdm *cdm, struct dpu_hw_cdm_cfg *cfg);
+
+	/**
+	 * Enable/disable the connection with pingpong
+	 * @cdm         Pointer to chroma down context
+	 * @pp          pingpong block id.
+	 */
+	void (*bind_pingpong_blk)(struct dpu_hw_cdm *cdm, const enum dpu_pingpong pp);
+};
+
+/**
+ * struct dpu_hw_cdm - cdm description
+ * @base: Hardware block base structure
+ * @hw: Block hardware details
+ * @idx: CDM index
+ * @caps: Pointer to cdm_cfg
+ * @ops: handle to operations possible for this CDM
+ */
+struct dpu_hw_cdm {
+	struct dpu_hw_blk base;
+	struct dpu_hw_blk_reg_map hw;
+
+	/* chroma down */
+	const struct dpu_cdm_cfg *caps;
+	enum  dpu_cdm  idx;
+
+	/* ops */
+	struct dpu_hw_cdm_ops ops;
+};
+
+/**
+ * dpu_hw_cdm_init - initializes the cdm hw driver object.
+ * should be called once before accessing every cdm.
+ * @dev: DRM device handle
+ * @cdm: CDM catalog entry for which driver object is required
+ * @addr :   mapped register io address of MDSS
+ * @mdss_rev: mdss hw core revision
+ */
+struct dpu_hw_cdm *dpu_hw_cdm_init(struct drm_device *dev,
+				   const struct dpu_cdm_cfg *cdm, void __iomem *addr,
+				   const struct dpu_mdss_version *mdss_rev);
+
+static inline struct dpu_hw_cdm *to_dpu_hw_cdm(struct dpu_hw_blk *hw)
+{
+	return container_of(hw, struct dpu_hw_cdm, base);
+}
+
+#endif /*_DPU_HW_CDM_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
index 86182c734606..e76565c3e6a4 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
@@ -4,6 +4,9 @@
  */
 
 #include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+
 #include "dpu_hwio.h"
 #include "dpu_hw_ctl.h"
 #include "dpu_kms.h"
@@ -29,11 +32,13 @@
 #define   CTL_DSC_ACTIVE                0x0E8
 #define   CTL_WB_ACTIVE                 0x0EC
 #define   CTL_INTF_ACTIVE               0x0F4
+#define   CTL_CDM_ACTIVE                0x0F8
 #define   CTL_FETCH_PIPE_ACTIVE         0x0FC
 #define   CTL_MERGE_3D_FLUSH            0x100
 #define   CTL_DSC_FLUSH                0x104
 #define   CTL_WB_FLUSH                  0x108
 #define   CTL_INTF_FLUSH                0x110
+#define   CTL_CDM_FLUSH                0x114
 #define   CTL_INTF_MASTER               0x134
 #define   CTL_DSPP_n_FLUSH(n)           ((0x13C) + ((n) * 4))
 
@@ -43,6 +48,7 @@
 #define DPU_REG_RESET_TIMEOUT_US        2000
 #define  MERGE_3D_IDX   23
 #define  DSC_IDX        22
+#define CDM_IDX         26
 #define  INTF_IDX       31
 #define WB_IDX          16
 #define  DSPP_IDX       29  /* From DPU hw rev 7.x.x */
@@ -104,6 +110,7 @@ static inline void dpu_hw_ctl_clear_pending_flush(struct dpu_hw_ctl *ctx)
 	ctx->pending_wb_flush_mask = 0;
 	ctx->pending_merge_3d_flush_mask = 0;
 	ctx->pending_dsc_flush_mask = 0;
+	ctx->pending_cdm_flush_mask = 0;
 
 	memset(ctx->pending_dspp_flush_mask, 0,
 		sizeof(ctx->pending_dspp_flush_mask));
@@ -148,6 +155,10 @@ static inline void dpu_hw_ctl_trigger_flush_v1(struct dpu_hw_ctl *ctx)
 		DPU_REG_WRITE(&ctx->hw, CTL_DSC_FLUSH,
 			      ctx->pending_dsc_flush_mask);
 
+	if (ctx->pending_flush_mask & BIT(CDM_IDX))
+		DPU_REG_WRITE(&ctx->hw, CTL_CDM_FLUSH,
+			      ctx->pending_cdm_flush_mask);
+
 	DPU_REG_WRITE(&ctx->hw, CTL_FLUSH, ctx->pending_flush_mask);
 }
 
@@ -279,6 +290,13 @@ static void dpu_hw_ctl_update_pending_flush_wb(struct dpu_hw_ctl *ctx,
 	}
 }
 
+static void dpu_hw_ctl_update_pending_flush_cdm(struct dpu_hw_ctl *ctx, enum dpu_cdm cdm_num)
+{
+	/* update pending flush only if CDM_0 is flushed */
+	if (cdm_num == CDM_0)
+		ctx->pending_flush_mask |= BIT(CDM_IDX);
+}
+
 static void dpu_hw_ctl_update_pending_flush_wb_v1(struct dpu_hw_ctl *ctx,
 		enum dpu_wb wb)
 {
@@ -307,6 +325,12 @@ static void dpu_hw_ctl_update_pending_flush_dsc_v1(struct dpu_hw_ctl *ctx,
 	ctx->pending_flush_mask |= BIT(DSC_IDX);
 }
 
+static void dpu_hw_ctl_update_pending_flush_cdm_v1(struct dpu_hw_ctl *ctx, enum dpu_cdm cdm_num)
+{
+	ctx->pending_cdm_flush_mask |= BIT(cdm_num - CDM_0);
+	ctx->pending_flush_mask |= BIT(CDM_IDX);
+}
+
 static void dpu_hw_ctl_update_pending_flush_dspp(struct dpu_hw_ctl *ctx,
 	enum dpu_dspp dspp, u32 dspp_sub_blk)
 {
@@ -540,6 +564,9 @@ static void dpu_hw_ctl_intf_cfg_v1(struct dpu_hw_ctl *ctx,
 
 	if (cfg->dsc)
 		DPU_REG_WRITE(c, CTL_DSC_ACTIVE, cfg->dsc);
+
+	if (cfg->cdm)
+		DPU_REG_WRITE(c, CTL_CDM_ACTIVE, cfg->cdm);
 }
 
 static void dpu_hw_ctl_intf_cfg(struct dpu_hw_ctl *ctx,
@@ -583,6 +610,7 @@ static void dpu_hw_ctl_reset_intf_cfg_v1(struct dpu_hw_ctl *ctx,
 	u32 wb_active = 0;
 	u32 merge3d_active = 0;
 	u32 dsc_active;
+	u32 cdm_active;
 
 	/*
 	 * This API resets each portion of the CTL path namely,
@@ -618,6 +646,12 @@ static void dpu_hw_ctl_reset_intf_cfg_v1(struct dpu_hw_ctl *ctx,
 		dsc_active &= ~cfg->dsc;
 		DPU_REG_WRITE(c, CTL_DSC_ACTIVE, dsc_active);
 	}
+
+	if (cfg->cdm) {
+		cdm_active = DPU_REG_READ(c, CTL_CDM_ACTIVE);
+		cdm_active &= ~cfg->cdm;
+		DPU_REG_WRITE(c, CTL_CDM_ACTIVE, cdm_active);
+	}
 }
 
 static void dpu_hw_ctl_set_fetch_pipe_active(struct dpu_hw_ctl *ctx,
@@ -651,12 +685,14 @@ static void _setup_ctl_ops(struct dpu_hw_ctl_ops *ops,
 		ops->update_pending_flush_wb = dpu_hw_ctl_update_pending_flush_wb_v1;
 		ops->update_pending_flush_dsc =
 			dpu_hw_ctl_update_pending_flush_dsc_v1;
+		ops->update_pending_flush_cdm = dpu_hw_ctl_update_pending_flush_cdm_v1;
 	} else {
 		ops->trigger_flush = dpu_hw_ctl_trigger_flush;
 		ops->setup_intf_cfg = dpu_hw_ctl_intf_cfg;
 		ops->update_pending_flush_intf =
 			dpu_hw_ctl_update_pending_flush_intf;
 		ops->update_pending_flush_wb = dpu_hw_ctl_update_pending_flush_wb;
+		ops->update_pending_flush_cdm = dpu_hw_ctl_update_pending_flush_cdm;
 	}
 	ops->clear_pending_flush = dpu_hw_ctl_clear_pending_flush;
 	ops->update_pending_flush = dpu_hw_ctl_update_pending_flush;
@@ -680,14 +716,15 @@ static void _setup_ctl_ops(struct dpu_hw_ctl_ops *ops,
 		ops->set_active_pipes = dpu_hw_ctl_set_fetch_pipe_active;
 };
 
-struct dpu_hw_ctl *dpu_hw_ctl_init(const struct dpu_ctl_cfg *cfg,
-		void __iomem *addr,
-		u32 mixer_count,
-		const struct dpu_lm_cfg *mixer)
+struct dpu_hw_ctl *dpu_hw_ctl_init(struct drm_device *dev,
+				   const struct dpu_ctl_cfg *cfg,
+				   void __iomem *addr,
+				   u32 mixer_count,
+				   const struct dpu_lm_cfg *mixer)
 {
 	struct dpu_hw_ctl *c;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -702,8 +739,3 @@ struct dpu_hw_ctl *dpu_hw_ctl_init(const struct dpu_ctl_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_ctl_destroy(struct dpu_hw_ctl *ctx)
-{
-	kfree(ctx);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h
index 1c242298ff2e..ff85b5ee0acf 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h
@@ -39,6 +39,7 @@ struct dpu_hw_stage_cfg {
  * @mode_3d:               3d mux configuration
  * @merge_3d:              3d merge block used
  * @intf_mode_sel:         Interface mode, cmd / vid
+ * @cdm:                   CDM block used
  * @stream_sel:            Stream selection for multi-stream interfaces
  * @dsc:                   DSC BIT masks used
  */
@@ -48,6 +49,7 @@ struct dpu_hw_intf_cfg {
 	enum dpu_3d_blend_mode mode_3d;
 	enum dpu_merge_3d merge_3d;
 	enum dpu_ctl_mode_sel intf_mode_sel;
+	enum dpu_cdm cdm;
 	int stream_sel;
 	unsigned int dsc;
 };
@@ -167,6 +169,14 @@ struct dpu_hw_ctl_ops {
 					 enum dpu_dsc blk);
 
 	/**
+	 * OR in the given flushbits to the cached pending_(cdm_)flush_mask
+	 * No effect on hardware
+	 * @ctx: ctl path ctx pointer
+	 * @cdm_num: idx of cdm to be flushed
+	 */
+	void (*update_pending_flush_cdm)(struct dpu_hw_ctl *ctx, enum dpu_cdm cdm_num);
+
+	/**
 	 * Write the value of the pending_flush_mask to hardware
 	 * @ctx       : ctl path ctx pointer
 	 */
@@ -239,6 +249,7 @@ struct dpu_hw_ctl_ops {
  * @pending_intf_flush_mask: pending INTF flush
  * @pending_wb_flush_mask: pending WB flush
  * @pending_dsc_flush_mask: pending DSC flush
+ * @pending_cdm_flush_mask: pending CDM flush
  * @ops: operation list
  */
 struct dpu_hw_ctl {
@@ -256,6 +267,7 @@ struct dpu_hw_ctl {
 	u32 pending_merge_3d_flush_mask;
 	u32 pending_dspp_flush_mask[DSPP_MAX - DSPP_0];
 	u32 pending_dsc_flush_mask;
+	u32 pending_cdm_flush_mask;
 
 	/* ops */
 	struct dpu_hw_ctl_ops ops;
@@ -274,20 +286,16 @@ static inline struct dpu_hw_ctl *to_dpu_hw_ctl(struct dpu_hw_blk *hw)
 /**
  * dpu_hw_ctl_init() - Initializes the ctl_path hw driver object.
  * Should be called before accessing any ctl_path register.
+ * @dev:  Corresponding device for devres management
  * @cfg:  ctl_path catalog entry for which driver object is required
  * @addr: mapped register io address of MDP
  * @mixer_count: Number of mixers in @mixer
  * @mixer: Pointer to an array of Layer Mixers defined in the catalog
  */
-struct dpu_hw_ctl *dpu_hw_ctl_init(const struct dpu_ctl_cfg *cfg,
-		void __iomem *addr,
-		u32 mixer_count,
-		const struct dpu_lm_cfg *mixer);
-
-/**
- * dpu_hw_ctl_destroy(): Destroys ctl driver context
- * should be called to free the context
- */
-void dpu_hw_ctl_destroy(struct dpu_hw_ctl *ctx);
+struct dpu_hw_ctl *dpu_hw_ctl_init(struct drm_device *dev,
+				   const struct dpu_ctl_cfg *cfg,
+				   void __iomem *addr,
+				   u32 mixer_count,
+				   const struct dpu_lm_cfg *mixer);
 
 #endif /*_DPU_HW_CTL_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
index 509dbaa51d87..5e9aad1b2aa2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
@@ -3,6 +3,8 @@
  * Copyright (c) 2020-2022, Linaro Limited
  */
 
+#include <drm/drm_managed.h>
+
 #include <drm/display/drm_dsc_helper.h>
 
 #include "dpu_kms.h"
@@ -188,12 +190,13 @@ static void _setup_dsc_ops(struct dpu_hw_dsc_ops *ops,
 		ops->dsc_bind_pingpong_blk = dpu_hw_dsc_bind_pingpong_blk;
 };
 
-struct dpu_hw_dsc *dpu_hw_dsc_init(const struct dpu_dsc_cfg *cfg,
+struct dpu_hw_dsc *dpu_hw_dsc_init(struct drm_device *dev,
+				   const struct dpu_dsc_cfg *cfg,
 				   void __iomem *addr)
 {
 	struct dpu_hw_dsc *c;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -206,8 +209,3 @@ struct dpu_hw_dsc *dpu_hw_dsc_init(const struct dpu_dsc_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_dsc_destroy(struct dpu_hw_dsc *dsc)
-{
-	kfree(dsc);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h
index d5b597ab8c5c..989c88d2449b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h
@@ -64,20 +64,24 @@ struct dpu_hw_dsc {
 
 /**
  * dpu_hw_dsc_init() - Initializes the DSC hw driver object.
+ * @dev:  Corresponding device for devres management
  * @cfg:  DSC catalog entry for which driver object is required
  * @addr: Mapped register io address of MDP
  * Return: Error code or allocated dpu_hw_dsc context
  */
-struct dpu_hw_dsc *dpu_hw_dsc_init(const struct dpu_dsc_cfg *cfg,
-		void __iomem *addr);
+struct dpu_hw_dsc *dpu_hw_dsc_init(struct drm_device *dev,
+				   const struct dpu_dsc_cfg *cfg,
+				   void __iomem *addr);
 
 /**
  * dpu_hw_dsc_init_1_2() - initializes the v1.2 DSC hw driver object
+ * @dev:  Corresponding device for devres management
  * @cfg:  DSC catalog entry for which driver object is required
  * @addr: Mapped register io address of MDP
  * Returns: Error code or allocated dpu_hw_dsc context
  */
-struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(const struct dpu_dsc_cfg *cfg,
+struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(struct drm_device *dev,
+				       const struct dpu_dsc_cfg *cfg,
 				       void __iomem *addr);
 
 /**
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c
index 24fe1d98eb86..ba193b0376fe 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c
@@ -4,6 +4,8 @@
  * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved
  */
 
+#include <drm/drm_managed.h>
+
 #include <drm/display/drm_dsc_helper.h>
 
 #include "dpu_kms.h"
@@ -367,12 +369,13 @@ static void _setup_dcs_ops_1_2(struct dpu_hw_dsc_ops *ops,
 	ops->dsc_bind_pingpong_blk = dpu_hw_dsc_bind_pingpong_blk_1_2;
 }
 
-struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(const struct dpu_dsc_cfg *cfg,
+struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(struct drm_device *dev,
+				       const struct dpu_dsc_cfg *cfg,
 				       void __iomem *addr)
 {
 	struct dpu_hw_dsc *c;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
index 9419b2209af8..b1da88e2935f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
@@ -2,6 +2,8 @@
 /* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
  */
 
+#include <drm/drm_managed.h>
+
 #include "dpu_hwio.h"
 #include "dpu_hw_catalog.h"
 #include "dpu_hw_lm.h"
@@ -68,15 +70,16 @@ static void _setup_dspp_ops(struct dpu_hw_dspp *c,
 		c->ops.setup_pcc = dpu_setup_dspp_pcc;
 }
 
-struct dpu_hw_dspp *dpu_hw_dspp_init(const struct dpu_dspp_cfg *cfg,
-			void __iomem *addr)
+struct dpu_hw_dspp *dpu_hw_dspp_init(struct drm_device *dev,
+				     const struct dpu_dspp_cfg *cfg,
+				     void __iomem *addr)
 {
 	struct dpu_hw_dspp *c;
 
 	if (!addr)
 		return ERR_PTR(-EINVAL);
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -90,10 +93,3 @@ struct dpu_hw_dspp *dpu_hw_dspp_init(const struct dpu_dspp_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_dspp_destroy(struct dpu_hw_dspp *dspp)
-{
-	kfree(dspp);
-}
-
-
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h
index bea965681330..3b435690b6cc 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h
@@ -81,18 +81,14 @@ static inline struct dpu_hw_dspp *to_dpu_hw_dspp(struct dpu_hw_blk *hw)
 /**
  * dpu_hw_dspp_init() - Initializes the DSPP hw driver object.
  * should be called once before accessing every DSPP.
+ * @dev:  Corresponding device for devres management
  * @cfg:  DSPP catalog entry for which driver object is required
  * @addr: Mapped register io address of MDP
  * Return: pointer to structure or ERR_PTR
  */
-struct dpu_hw_dspp *dpu_hw_dspp_init(const struct dpu_dspp_cfg *cfg,
-	void __iomem *addr);
-
-/**
- * dpu_hw_dspp_destroy(): Destroys DSPP driver context
- * @dspp: Pointer to DSPP driver context
- */
-void dpu_hw_dspp_destroy(struct dpu_hw_dspp *dspp);
+struct dpu_hw_dspp *dpu_hw_dspp_init(struct drm_device *dev,
+				     const struct dpu_dspp_cfg *cfg,
+				     void __iomem *addr);
 
 #endif /*_DPU_HW_DSPP_H */
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
index 088807db2c83..946dd0135dff 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
@@ -6,6 +6,8 @@
 #include <linux/debugfs.h>
 #include <linux/slab.h>
 
+#include <drm/drm_managed.h>
+
 #include "dpu_core_irq.h"
 #include "dpu_kms.h"
 #include "dpu_hw_interrupts.h"
@@ -472,8 +474,9 @@ u32 dpu_core_irq_read(struct dpu_kms *dpu_kms,
 	return intr_status;
 }
 
-struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr,
-		const struct dpu_mdss_cfg *m)
+struct dpu_hw_intr *dpu_hw_intr_init(struct drm_device *dev,
+				     void __iomem *addr,
+				     const struct dpu_mdss_cfg *m)
 {
 	struct dpu_hw_intr *intr;
 	unsigned int i;
@@ -481,7 +484,7 @@ struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr,
 	if (!addr || !m)
 		return ERR_PTR(-EINVAL);
 
-	intr = kzalloc(sizeof(*intr), GFP_KERNEL);
+	intr = drmm_kzalloc(dev, sizeof(*intr), GFP_KERNEL);
 	if (!intr)
 		return ERR_PTR(-ENOMEM);
 
@@ -512,11 +515,6 @@ struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr,
 	return intr;
 }
 
-void dpu_hw_intr_destroy(struct dpu_hw_intr *intr)
-{
-	kfree(intr);
-}
-
 int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms,
 				   unsigned int irq_idx,
 				   void (*irq_cb)(void *arg),
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
index 53a21ebc57e8..564b750a28fe 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
@@ -70,15 +70,12 @@ struct dpu_hw_intr {
 
 /**
  * dpu_hw_intr_init(): Initializes the interrupts hw object
+ * @dev:  Corresponding device for devres management
  * @addr: mapped register io address of MDP
  * @m:    pointer to MDSS catalog data
  */
-struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr,
-		const struct dpu_mdss_cfg *m);
+struct dpu_hw_intr *dpu_hw_intr_init(struct drm_device *dev,
+				     void __iomem *addr,
+				     const struct dpu_mdss_cfg *m);
 
-/**
- * dpu_hw_intr_destroy(): Cleanup interrutps hw object
- * @intr: pointer to interrupts hw object
- */
-void dpu_hw_intr_destroy(struct dpu_hw_intr *intr);
 #endif
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
index e8b8908d3e12..6bba531d6dc4 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
  */
 
@@ -12,6 +12,8 @@
 
 #include <linux/iopoll.h>
 
+#include <drm/drm_managed.h>
+
 #define INTF_TIMING_ENGINE_EN           0x000
 #define INTF_CONFIG                     0x004
 #define INTF_HSYNC_CTL                  0x008
@@ -318,9 +320,9 @@ static u32 dpu_hw_intf_get_line_count(struct dpu_hw_intf *intf)
 	return DPU_REG_READ(c, INTF_LINE_COUNT);
 }
 
-static void dpu_hw_intf_setup_misr(struct dpu_hw_intf *intf, bool enable, u32 frame_count)
+static void dpu_hw_intf_setup_misr(struct dpu_hw_intf *intf)
 {
-	dpu_hw_setup_misr(&intf->hw, INTF_MISR_CTRL, enable, frame_count);
+	dpu_hw_setup_misr(&intf->hw, INTF_MISR_CTRL, 0x1);
 }
 
 static int dpu_hw_intf_collect_misr(struct dpu_hw_intf *intf, u32 *misr_value)
@@ -527,8 +529,10 @@ static void dpu_hw_intf_program_intf_cmd_cfg(struct dpu_hw_intf *ctx,
 	DPU_REG_WRITE(&ctx->hw, INTF_CONFIG2, intf_cfg2);
 }
 
-struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg,
-		void __iomem *addr, const struct dpu_mdss_version *mdss_rev)
+struct dpu_hw_intf *dpu_hw_intf_init(struct drm_device *dev,
+				     const struct dpu_intf_cfg *cfg,
+				     void __iomem *addr,
+				     const struct dpu_mdss_version *mdss_rev)
 {
 	struct dpu_hw_intf *c;
 
@@ -537,7 +541,7 @@ struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg,
 		return NULL;
 	}
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -581,9 +585,3 @@ struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_intf_destroy(struct dpu_hw_intf *intf)
-{
-	kfree(intf);
-}
-
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
index c539025c418b..0bd57a32144a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
  */
 
@@ -95,7 +95,7 @@ struct dpu_hw_intf_ops {
 
 	void (*bind_pingpong_blk)(struct dpu_hw_intf *intf,
 			const enum dpu_pingpong pp);
-	void (*setup_misr)(struct dpu_hw_intf *intf, bool enable, u32 frame_count);
+	void (*setup_misr)(struct dpu_hw_intf *intf);
 	int (*collect_misr)(struct dpu_hw_intf *intf, u32 *misr_value);
 
 	// Tearcheck on INTF since DPU 5.0.0
@@ -131,17 +131,14 @@ struct dpu_hw_intf {
 /**
  * dpu_hw_intf_init() - Initializes the INTF driver for the passed
  * interface catalog entry.
+ * @dev:  Corresponding device for devres management
  * @cfg:  interface catalog entry for which driver object is required
  * @addr: mapped register io address of MDP
  * @mdss_rev: dpu core's major and minor versions
  */
-struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg,
-		void __iomem *addr, const struct dpu_mdss_version *mdss_rev);
-
-/**
- * dpu_hw_intf_destroy(): Destroys INTF driver context
- * @intf:   Pointer to INTF driver context
- */
-void dpu_hw_intf_destroy(struct dpu_hw_intf *intf);
+struct dpu_hw_intf *dpu_hw_intf_init(struct drm_device *dev,
+				     const struct dpu_intf_cfg *cfg,
+				     void __iomem *addr,
+				     const struct dpu_mdss_version *mdss_rev);
 
 #endif /*_DPU_HW_INTF_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
index d1c3bd8379ea..1d3ccf3228c6 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
@@ -1,9 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved.
  */
 
+#include <drm/drm_managed.h>
+
 #include "dpu_kms.h"
 #include "dpu_hw_catalog.h"
 #include "dpu_hwio.h"
@@ -81,9 +83,9 @@ static void dpu_hw_lm_setup_border_color(struct dpu_hw_mixer *ctx,
 	}
 }
 
-static void dpu_hw_lm_setup_misr(struct dpu_hw_mixer *ctx, bool enable, u32 frame_count)
+static void dpu_hw_lm_setup_misr(struct dpu_hw_mixer *ctx)
 {
-	dpu_hw_setup_misr(&ctx->hw, LM_MISR_CTRL, enable, frame_count);
+	dpu_hw_setup_misr(&ctx->hw, LM_MISR_CTRL, 0x0);
 }
 
 static int dpu_hw_lm_collect_misr(struct dpu_hw_mixer *ctx, u32 *misr_value)
@@ -156,8 +158,9 @@ static void _setup_mixer_ops(struct dpu_hw_lm_ops *ops,
 	ops->collect_misr = dpu_hw_lm_collect_misr;
 }
 
-struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg,
-		void __iomem *addr)
+struct dpu_hw_mixer *dpu_hw_lm_init(struct drm_device *dev,
+				    const struct dpu_lm_cfg *cfg,
+				    void __iomem *addr)
 {
 	struct dpu_hw_mixer *c;
 
@@ -166,7 +169,7 @@ struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg,
 		return NULL;
 	}
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -180,8 +183,3 @@ struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_lm_destroy(struct dpu_hw_mixer *lm)
-{
-	kfree(lm);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h
index 36992d046a53..0a3381755249 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved.
  */
 
@@ -57,7 +58,7 @@ struct dpu_hw_lm_ops {
 	/**
 	 * setup_misr: Enable/disable MISR
 	 */
-	void (*setup_misr)(struct dpu_hw_mixer *ctx, bool enable, u32 frame_count);
+	void (*setup_misr)(struct dpu_hw_mixer *ctx);
 
 	/**
 	 * collect_misr: Read MISR signature
@@ -95,16 +96,12 @@ static inline struct dpu_hw_mixer *to_dpu_hw_mixer(struct dpu_hw_blk *hw)
 /**
  * dpu_hw_lm_init() - Initializes the mixer hw driver object.
  * should be called once before accessing every mixer.
+ * @dev:  Corresponding device for devres management
  * @cfg:  mixer catalog entry for which driver object is required
  * @addr: mapped register io address of MDP
  */
-struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg,
-		void __iomem *addr);
-
-/**
- * dpu_hw_lm_destroy(): Destroys layer mixer driver context
- * @lm:   Pointer to LM driver context
- */
-void dpu_hw_lm_destroy(struct dpu_hw_mixer *lm);
+struct dpu_hw_mixer *dpu_hw_lm_init(struct drm_device *dev,
+				    const struct dpu_lm_cfg *cfg,
+				    void __iomem *addr);
 
 #endif /*_DPU_HW_LM_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
index d85157acfbf8..5df545904057 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
@@ -98,6 +98,7 @@ enum dpu_hw_blk_type {
 	DPU_HW_BLK_DSPP,
 	DPU_HW_BLK_MERGE_3D,
 	DPU_HW_BLK_DSC,
+	DPU_HW_BLK_CDM,
 	DPU_HW_BLK_MAX,
 };
 
@@ -185,6 +186,11 @@ enum dpu_dsc {
 	DSC_MAX
 };
 
+enum dpu_cdm {
+	CDM_0 = 1,
+	CDM_MAX
+};
+
 enum dpu_pingpong {
 	PINGPONG_NONE,
 	PINGPONG_0,
@@ -195,6 +201,8 @@ enum dpu_pingpong {
 	PINGPONG_5,
 	PINGPONG_6,
 	PINGPONG_7,
+	PINGPONG_8,
+	PINGPONG_9,
 	PINGPONG_S0,
 	PINGPONG_MAX
 };
@@ -204,6 +212,7 @@ enum dpu_merge_3d {
 	MERGE_3D_1,
 	MERGE_3D_2,
 	MERGE_3D_3,
+	MERGE_3D_4,
 	MERGE_3D_MAX
 };
 
@@ -458,6 +467,7 @@ struct dpu_mdss_color {
 #define DPU_DBG_MASK_ROT      (1 << 9)
 #define DPU_DBG_MASK_DSPP     (1 << 10)
 #define DPU_DBG_MASK_DSC      (1 << 11)
+#define DPU_DBG_MASK_CDM      (1 << 12)
 
 /**
  * struct dpu_hw_tear_check - Struct contains parameters to configure
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c
index 90e0e05eff8d..ddfa40a959cb 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c
@@ -4,6 +4,8 @@
 
 #include <linux/iopoll.h>
 
+#include <drm/drm_managed.h>
+
 #include "dpu_hw_mdss.h"
 #include "dpu_hwio.h"
 #include "dpu_hw_catalog.h"
@@ -37,12 +39,13 @@ static void _setup_merge_3d_ops(struct dpu_hw_merge_3d *c,
 	c->ops.setup_3d_mode = dpu_hw_merge_3d_setup_3d_mode;
 };
 
-struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(const struct dpu_merge_3d_cfg *cfg,
-		void __iomem *addr)
+struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(struct drm_device *dev,
+					     const struct dpu_merge_3d_cfg *cfg,
+					     void __iomem *addr)
 {
 	struct dpu_hw_merge_3d *c;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -55,8 +58,3 @@ struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(const struct dpu_merge_3d_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_merge_3d_destroy(struct dpu_hw_merge_3d *hw)
-{
-	kfree(hw);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h
index 19cec5e88722..c192f02ec1ab 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h
@@ -48,18 +48,13 @@ static inline struct dpu_hw_merge_3d *to_dpu_hw_merge_3d(struct dpu_hw_blk *hw)
 /**
  * dpu_hw_merge_3d_init() - Initializes the merge_3d driver for the passed
  * merge3d catalog entry.
+ * @dev:  Corresponding device for devres management
  * @cfg:  Pingpong catalog entry for which driver object is required
  * @addr: Mapped register io address of MDP
  * Return: Error code or allocated dpu_hw_merge_3d context
  */
-struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(const struct dpu_merge_3d_cfg *cfg,
-		void __iomem *addr);
-
-/**
- * dpu_hw_merge_3d_destroy - destroys merge_3d driver context
- *	should be called to free the context
- * @pp:   Pointer to PP driver context returned by dpu_hw_merge_3d_init
- */
-void dpu_hw_merge_3d_destroy(struct dpu_hw_merge_3d *pp);
+struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(struct drm_device *dev,
+					     const struct dpu_merge_3d_cfg *cfg,
+					     void __iomem *addr);
 
 #endif /*_DPU_HW_MERGE3D_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c
index 057cac7f5d93..2db4c6fba37a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c
@@ -4,6 +4,8 @@
 
 #include <linux/iopoll.h>
 
+#include <drm/drm_managed.h>
+
 #include "dpu_hw_mdss.h"
 #include "dpu_hwio.h"
 #include "dpu_hw_catalog.h"
@@ -281,12 +283,14 @@ static int dpu_hw_pp_setup_dsc(struct dpu_hw_pingpong *pp)
 	return 0;
 }
 
-struct dpu_hw_pingpong *dpu_hw_pingpong_init(const struct dpu_pingpong_cfg *cfg,
-		void __iomem *addr, const struct dpu_mdss_version *mdss_rev)
+struct dpu_hw_pingpong *dpu_hw_pingpong_init(struct drm_device *dev,
+					     const struct dpu_pingpong_cfg *cfg,
+					     void __iomem *addr,
+					     const struct dpu_mdss_version *mdss_rev)
 {
 	struct dpu_hw_pingpong *c;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -317,8 +321,3 @@ struct dpu_hw_pingpong *dpu_hw_pingpong_init(const struct dpu_pingpong_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_pingpong_destroy(struct dpu_hw_pingpong *pp)
-{
-	kfree(pp);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h
index 0d541ca5b056..a48b69fd79a3 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h
@@ -121,19 +121,15 @@ static inline struct dpu_hw_pingpong *to_dpu_hw_pingpong(struct dpu_hw_blk *hw)
 /**
  * dpu_hw_pingpong_init() - initializes the pingpong driver for the passed
  * pingpong catalog entry.
+ * @dev:  Corresponding device for devres management
  * @cfg:  Pingpong catalog entry for which driver object is required
  * @addr: Mapped register io address of MDP
  * @mdss_rev: dpu core's major and minor versions
  * Return: Error code or allocated dpu_hw_pingpong context
  */
-struct dpu_hw_pingpong *dpu_hw_pingpong_init(const struct dpu_pingpong_cfg *cfg,
-		void __iomem *addr, const struct dpu_mdss_version *mdss_rev);
-
-/**
- * dpu_hw_pingpong_destroy - destroys pingpong driver context
- *	should be called to free the context
- * @pp:   Pointer to PP driver context returned by dpu_hw_pingpong_init
- */
-void dpu_hw_pingpong_destroy(struct dpu_hw_pingpong *pp);
+struct dpu_hw_pingpong *dpu_hw_pingpong_init(struct drm_device *dev,
+					     const struct dpu_pingpong_cfg *cfg,
+					     void __iomem *addr,
+					     const struct dpu_mdss_version *mdss_rev);
 
 #endif /*_DPU_HW_PINGPONG_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
index 8e3c65989c49..0bf8a83e8df3 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
@@ -11,6 +11,7 @@
 #include "msm_mdss.h"
 
 #include <drm/drm_file.h>
+#include <drm/drm_managed.h>
 
 #define DPU_FETCH_CONFIG_RESET_VALUE   0x00000087
 
@@ -395,15 +396,6 @@ static void _dpu_hw_sspp_setup_scaler3(struct dpu_hw_sspp *ctx,
 			format);
 }
 
-static u32 _dpu_hw_sspp_get_scaler3_ver(struct dpu_hw_sspp *ctx)
-{
-	if (!ctx)
-		return 0;
-
-	return dpu_hw_get_scaler3_ver(&ctx->hw,
-				      ctx->cap->sblk->scaler_blk.base);
-}
-
 /*
  * dpu_hw_sspp_setup_rects()
  */
@@ -614,12 +606,8 @@ static void _setup_layer_ops(struct dpu_hw_sspp *c,
 		test_bit(DPU_SSPP_SMART_DMA_V2, &c->cap->features))
 		c->ops.setup_multirect = dpu_hw_sspp_setup_multirect;
 
-	if (test_bit(DPU_SSPP_SCALER_QSEED3, &features) ||
-			test_bit(DPU_SSPP_SCALER_QSEED3LITE, &features) ||
-			test_bit(DPU_SSPP_SCALER_QSEED4, &features)) {
+	if (test_bit(DPU_SSPP_SCALER_QSEED3_COMPATIBLE, &features))
 		c->ops.setup_scaler = _dpu_hw_sspp_setup_scaler3;
-		c->ops.get_scaler_ver = _dpu_hw_sspp_get_scaler3_ver;
-	}
 
 	if (test_bit(DPU_SSPP_CDP, &features))
 		c->ops.setup_cdp = dpu_hw_sspp_setup_cdp;
@@ -654,10 +642,7 @@ int _dpu_hw_sspp_init_debugfs(struct dpu_hw_sspp *hw_pipe, struct dpu_kms *kms,
 			cfg->len,
 			kms);
 
-	if (cfg->features & BIT(DPU_SSPP_SCALER_QSEED3) ||
-			cfg->features & BIT(DPU_SSPP_SCALER_QSEED3LITE) ||
-			cfg->features & BIT(DPU_SSPP_SCALER_QSEED2) ||
-			cfg->features & BIT(DPU_SSPP_SCALER_QSEED4))
+	if (sblk->scaler_blk.len)
 		dpu_debugfs_create_regset32("scaler_blk", 0400,
 				debugfs_root,
 				sblk->scaler_blk.base + cfg->base,
@@ -685,16 +670,18 @@ int _dpu_hw_sspp_init_debugfs(struct dpu_hw_sspp *hw_pipe, struct dpu_kms *kms,
 }
 #endif
 
-struct dpu_hw_sspp *dpu_hw_sspp_init(const struct dpu_sspp_cfg *cfg,
-		void __iomem *addr, const struct msm_mdss_data *mdss_data,
-		const struct dpu_mdss_version *mdss_rev)
+struct dpu_hw_sspp *dpu_hw_sspp_init(struct drm_device *dev,
+				     const struct dpu_sspp_cfg *cfg,
+				     void __iomem *addr,
+				     const struct msm_mdss_data *mdss_data,
+				     const struct dpu_mdss_version *mdss_rev)
 {
 	struct dpu_hw_sspp *hw_pipe;
 
 	if (!addr)
 		return ERR_PTR(-EINVAL);
 
-	hw_pipe = kzalloc(sizeof(*hw_pipe), GFP_KERNEL);
+	hw_pipe = drmm_kzalloc(dev, sizeof(*hw_pipe), GFP_KERNEL);
 	if (!hw_pipe)
 		return ERR_PTR(-ENOMEM);
 
@@ -709,9 +696,3 @@ struct dpu_hw_sspp *dpu_hw_sspp_init(const struct dpu_sspp_cfg *cfg,
 
 	return hw_pipe;
 }
-
-void dpu_hw_sspp_destroy(struct dpu_hw_sspp *ctx)
-{
-	kfree(ctx);
-}
-
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
index f93969fddb22..b7dc52312c39 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
@@ -22,21 +22,6 @@ struct dpu_hw_sspp;
 #define DPU_SSPP_SOLID_FILL		BIT(4)
 
 /**
- * Define all scaler feature bits in catalog
- */
-#define DPU_SSPP_SCALER (BIT(DPU_SSPP_SCALER_RGB) | \
-			 BIT(DPU_SSPP_SCALER_QSEED2) | \
-			 BIT(DPU_SSPP_SCALER_QSEED3) | \
-			 BIT(DPU_SSPP_SCALER_QSEED3LITE) | \
-			 BIT(DPU_SSPP_SCALER_QSEED4))
-
-/*
- * Define all CSC feature bits in catalog
- */
-#define DPU_SSPP_CSC_ANY (BIT(DPU_SSPP_CSC) | \
-			  BIT(DPU_SSPP_CSC_10BIT))
-
-/**
  * Component indices
  */
 enum {
@@ -297,12 +282,6 @@ struct dpu_hw_sspp_ops {
 		const struct dpu_format *format);
 
 	/**
-	 * get_scaler_ver - get scaler h/w version
-	 * @ctx: Pointer to pipe context
-	 */
-	u32 (*get_scaler_ver)(struct dpu_hw_sspp *ctx);
-
-	/**
 	 * setup_cdp - setup client driven prefetch
 	 * @pipe: Pointer to software pipe context
 	 * @fmt: format used by the sw pipe
@@ -339,21 +318,17 @@ struct dpu_kms;
 /**
  * dpu_hw_sspp_init() - Initializes the sspp hw driver object.
  * Should be called once before accessing every pipe.
+ * @dev:  Corresponding device for devres management
  * @cfg:  Pipe catalog entry for which driver object is required
  * @addr: Mapped register io address of MDP
  * @mdss_data: UBWC / MDSS configuration data
  * @mdss_rev: dpu core's major and minor versions
  */
-struct dpu_hw_sspp *dpu_hw_sspp_init(const struct dpu_sspp_cfg *cfg,
-		void __iomem *addr, const struct msm_mdss_data *mdss_data,
-		const struct dpu_mdss_version *mdss_rev);
-
-/**
- * dpu_hw_sspp_destroy(): Destroys SSPP driver context
- * should be called during Hw pipe cleanup.
- * @ctx:  Pointer to SSPP driver context returned by dpu_hw_sspp_init
- */
-void dpu_hw_sspp_destroy(struct dpu_hw_sspp *ctx);
+struct dpu_hw_sspp *dpu_hw_sspp_init(struct drm_device *dev,
+				     const struct dpu_sspp_cfg *cfg,
+				     void __iomem *addr,
+				     const struct msm_mdss_data *mdss_data,
+				     const struct dpu_mdss_version *mdss_rev);
 
 int _dpu_hw_sspp_init_debugfs(struct dpu_hw_sspp *hw_pipe, struct dpu_kms *kms,
 			      struct dentry *entry);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c
index 24e734768a72..05e48cf4ec1d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c
@@ -2,6 +2,8 @@
 /* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
  */
 
+#include <drm/drm_managed.h>
+
 #include "dpu_hwio.h"
 #include "dpu_hw_catalog.h"
 #include "dpu_hw_top.h"
@@ -247,16 +249,17 @@ static void _setup_mdp_ops(struct dpu_hw_mdp_ops *ops,
 		ops->intf_audio_select = dpu_hw_intf_audio_select;
 }
 
-struct dpu_hw_mdp *dpu_hw_mdptop_init(const struct dpu_mdp_cfg *cfg,
-		void __iomem *addr,
-		const struct dpu_mdss_cfg *m)
+struct dpu_hw_mdp *dpu_hw_mdptop_init(struct drm_device *dev,
+				      const struct dpu_mdp_cfg *cfg,
+				      void __iomem *addr,
+				      const struct dpu_mdss_cfg *m)
 {
 	struct dpu_hw_mdp *mdp;
 
 	if (!addr)
 		return ERR_PTR(-EINVAL);
 
-	mdp = kzalloc(sizeof(*mdp), GFP_KERNEL);
+	mdp = drmm_kzalloc(dev, sizeof(*mdp), GFP_KERNEL);
 	if (!mdp)
 		return ERR_PTR(-ENOMEM);
 
@@ -271,9 +274,3 @@ struct dpu_hw_mdp *dpu_hw_mdptop_init(const struct dpu_mdp_cfg *cfg,
 
 	return mdp;
 }
-
-void dpu_hw_mdp_destroy(struct dpu_hw_mdp *mdp)
-{
-	kfree(mdp);
-}
-
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h
index 8b1463d2b2f0..6f3dc98087df 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h
@@ -145,13 +145,15 @@ struct dpu_hw_mdp {
 
 /**
  * dpu_hw_mdptop_init - initializes the top driver for the passed config
+ * @dev:  Corresponding device for devres management
  * @cfg:  MDP TOP configuration from catalog
  * @addr: Mapped register io address of MDP
  * @m:    Pointer to mdss catalog data
  */
-struct dpu_hw_mdp *dpu_hw_mdptop_init(const struct dpu_mdp_cfg *cfg,
-		void __iomem *addr,
-		const struct dpu_mdss_cfg *m);
+struct dpu_hw_mdp *dpu_hw_mdptop_init(struct drm_device *dev,
+				      const struct dpu_mdp_cfg *cfg,
+				      void __iomem *addr,
+				      const struct dpu_mdss_cfg *m);
 
 void dpu_hw_mdp_destroy(struct dpu_hw_mdp *mdp);
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
index 18b16b2d2bf5..dd475827314e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
  */
 #define pr_fmt(fmt)	"[drm:%s:%d] " fmt, __func__, __LINE__
@@ -381,12 +381,6 @@ end:
 	DPU_REG_WRITE(c, QSEED3_OP_MODE + scaler_offset, op_mode);
 }
 
-u32 dpu_hw_get_scaler3_ver(struct dpu_hw_blk_reg_map *c,
-			u32 scaler_offset)
-{
-	return DPU_REG_READ(c, QSEED3_HW_VERSION + scaler_offset);
-}
-
 void dpu_hw_csc_setup(struct dpu_hw_blk_reg_map *c,
 		u32 csc_reg_off,
 		const struct dpu_csc_cfg *data, bool csc10)
@@ -481,9 +475,11 @@ void _dpu_hw_setup_qos_lut(struct dpu_hw_blk_reg_map *c, u32 offset,
 		      cfg->danger_safe_en ? QOS_QOS_CTRL_DANGER_SAFE_EN : 0);
 }
 
+/*
+ * note: Aside from encoders, input_sel should be set to 0x0 by default
+ */
 void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c,
-		u32 misr_ctrl_offset,
-		bool enable, u32 frame_count)
+		u32 misr_ctrl_offset, u8 input_sel)
 {
 	u32 config = 0;
 
@@ -492,15 +488,9 @@ void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c,
 	/* Clear old MISR value (in case it's read before a new value is calculated)*/
 	wmb();
 
-	if (enable) {
-		config = (frame_count & MISR_FRAME_COUNT_MASK) |
-			MISR_CTRL_ENABLE | MISR_CTRL_FREE_RUN_MASK;
-
-		DPU_REG_WRITE(c, misr_ctrl_offset, config);
-	} else {
-		DPU_REG_WRITE(c, misr_ctrl_offset, 0);
-	}
-
+	config = MISR_FRAME_COUNT | MISR_CTRL_ENABLE | MISR_CTRL_FREE_RUN_MASK |
+		((input_sel & 0xF) << 24);
+	DPU_REG_WRITE(c, misr_ctrl_offset, config);
 }
 
 int dpu_hw_collect_misr(struct dpu_hw_blk_reg_map *c,
@@ -567,3 +557,47 @@ bool dpu_hw_clk_force_ctrl(struct dpu_hw_blk_reg_map *c,
 
 	return clk_forced_on;
 }
+
+#define TO_S15D16(_x_)((_x_) << 7)
+
+const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
+	{
+		/* S15.16 format */
+		0x00012A00, 0x00000000, 0x00019880,
+		0x00012A00, 0xFFFF9B80, 0xFFFF3000,
+		0x00012A00, 0x00020480, 0x00000000,
+	},
+	/* signed bias */
+	{ 0xfff0, 0xff80, 0xff80,},
+	{ 0x0, 0x0, 0x0,},
+	/* unsigned clamp */
+	{ 0x10, 0xeb, 0x10, 0xf0, 0x10, 0xf0,},
+	{ 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,},
+};
+
+const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
+	{
+		/* S15.16 format */
+		0x00012A00, 0x00000000, 0x00019880,
+		0x00012A00, 0xFFFF9B80, 0xFFFF3000,
+		0x00012A00, 0x00020480, 0x00000000,
+	},
+	/* signed bias */
+	{ 0xffc0, 0xfe00, 0xfe00,},
+	{ 0x0, 0x0, 0x0,},
+	/* unsigned clamp */
+	{ 0x40, 0x3ac, 0x40, 0x3c0, 0x40, 0x3c0,},
+	{ 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
+};
+
+const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = {
+	{
+		TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032),
+		TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1),
+		TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc)
+	},
+	{ 0x00, 0x00, 0x00 },
+	{ 0x0040, 0x0200, 0x0200 },
+	{ 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff },
+	{ 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 },
+};
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
index 4bea139081bc..64ded69fa903 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved.
  */
 
@@ -13,12 +13,18 @@
 #include "dpu_hw_catalog.h"
 
 #define REG_MASK(n)                     ((BIT(n)) - 1)
-#define MISR_FRAME_COUNT_MASK           0xFF
+#define MISR_FRAME_COUNT                0x1
 #define MISR_CTRL_ENABLE                BIT(8)
 #define MISR_CTRL_STATUS                BIT(9)
 #define MISR_CTRL_STATUS_CLEAR          BIT(10)
 #define MISR_CTRL_FREE_RUN_MASK         BIT(31)
 
+#define TO_S15D16(_x_)((_x_) << 7)
+
+extern const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L;
+extern const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L;
+extern const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l;
+
 /*
  * This is the common struct maintained by each sub block
  * for mapping the register offsets in this block to the
@@ -340,9 +346,6 @@ void dpu_hw_setup_scaler3(struct dpu_hw_blk_reg_map *c,
 		u32 scaler_offset, u32 scaler_version,
 		const struct dpu_format *format);
 
-u32 dpu_hw_get_scaler3_ver(struct dpu_hw_blk_reg_map *c,
-		u32 scaler_offset);
-
 void dpu_hw_csc_setup(struct dpu_hw_blk_reg_map  *c,
 		u32 csc_reg_off,
 		const struct dpu_csc_cfg *data, bool csc10);
@@ -358,9 +361,7 @@ void _dpu_hw_setup_qos_lut(struct dpu_hw_blk_reg_map *c, u32 offset,
 			   const struct dpu_hw_qos_cfg *cfg);
 
 void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c,
-		u32 misr_ctrl_offset,
-		bool enable,
-		u32 frame_count);
+		u32 misr_ctrl_offset, u8 input_sel);
 
 int dpu_hw_collect_misr(struct dpu_hw_blk_reg_map *c,
 		u32 misr_ctrl_offset,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c
index a5121a50b2bb..98e34afde2d2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c
@@ -2,6 +2,8 @@
 /* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
  */
 
+#include <drm/drm_managed.h>
+
 #include "dpu_hwio.h"
 #include "dpu_hw_catalog.h"
 #include "dpu_hw_vbif.h"
@@ -211,12 +213,13 @@ static void _setup_vbif_ops(struct dpu_hw_vbif_ops *ops,
 	ops->set_write_gather_en = dpu_hw_set_write_gather_en;
 }
 
-struct dpu_hw_vbif *dpu_hw_vbif_init(const struct dpu_vbif_cfg *cfg,
-		void __iomem *addr)
+struct dpu_hw_vbif *dpu_hw_vbif_init(struct drm_device *dev,
+				     const struct dpu_vbif_cfg *cfg,
+				     void __iomem *addr)
 {
 	struct dpu_hw_vbif *c;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -234,8 +237,3 @@ struct dpu_hw_vbif *dpu_hw_vbif_init(const struct dpu_vbif_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_vbif_destroy(struct dpu_hw_vbif *vbif)
-{
-	kfree(vbif);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h
index 7e10d2a172b4..e2b4307500e4 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h
@@ -108,12 +108,12 @@ struct dpu_hw_vbif {
 /**
  * dpu_hw_vbif_init() - Initializes the VBIF driver for the passed
  * VBIF catalog entry.
+ * @dev:  Corresponding device for devres management
  * @cfg:  VBIF catalog entry for which driver object is required
  * @addr: Mapped register io address of MDSS
  */
-struct dpu_hw_vbif *dpu_hw_vbif_init(const struct dpu_vbif_cfg *cfg,
-		void __iomem *addr);
-
-void dpu_hw_vbif_destroy(struct dpu_hw_vbif *vbif);
+struct dpu_hw_vbif *dpu_hw_vbif_init(struct drm_device *dev,
+				     const struct dpu_vbif_cfg *cfg,
+				     void __iomem *addr);
 
 #endif /*_DPU_HW_VBIF_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
index 9668fb97c047..e75995f7fcea 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
@@ -3,6 +3,8 @@
   * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved
   */
 
+#include <drm/drm_managed.h>
+
 #include "dpu_hw_mdss.h"
 #include "dpu_hwio.h"
 #include "dpu_hw_catalog.h"
@@ -87,6 +89,9 @@ static void dpu_hw_wb_setup_format(struct dpu_hw_wb *ctx,
 			dst_format |= BIT(14); /* DST_ALPHA_X */
 	}
 
+	if (DPU_FORMAT_IS_YUV(fmt))
+		dst_format |= BIT(15);
+
 	pattern = (fmt->element[3] << 24) |
 		(fmt->element[2] << 16) |
 		(fmt->element[1] << 8)  |
@@ -208,15 +213,17 @@ static void _setup_wb_ops(struct dpu_hw_wb_ops *ops,
 		ops->setup_clk_force_ctrl = dpu_hw_wb_setup_clk_force_ctrl;
 }
 
-struct dpu_hw_wb *dpu_hw_wb_init(const struct dpu_wb_cfg *cfg,
-		void __iomem *addr, const struct dpu_mdss_version *mdss_rev)
+struct dpu_hw_wb *dpu_hw_wb_init(struct drm_device *dev,
+				 const struct dpu_wb_cfg *cfg,
+				 void __iomem *addr,
+				 const struct dpu_mdss_version *mdss_rev)
 {
 	struct dpu_hw_wb *c;
 
 	if (!addr)
 		return ERR_PTR(-EINVAL);
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
@@ -230,8 +237,3 @@ struct dpu_hw_wb *dpu_hw_wb_init(const struct dpu_wb_cfg *cfg,
 
 	return c;
 }
-
-void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb)
-{
-	kfree(hw_wb);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
index 88792f450a92..e671796ea379 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
@@ -76,18 +76,15 @@ struct dpu_hw_wb {
 
 /**
  * dpu_hw_wb_init() - Initializes the writeback hw driver object.
+ * @dev:  Corresponding device for devres management
  * @cfg:  wb_path catalog entry for which driver object is required
  * @addr: mapped register io address of MDP
  * @mdss_rev: dpu core's major and minor versions
  * Return: Error code or allocated dpu_hw_wb context
  */
-struct dpu_hw_wb *dpu_hw_wb_init(const struct dpu_wb_cfg *cfg,
-		void __iomem *addr, const struct dpu_mdss_version *mdss_rev);
-
-/**
- * dpu_hw_wb_destroy(): Destroy writeback hw driver object.
- * @hw_wb:  Pointer to writeback hw driver object
- */
-void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb);
+struct dpu_hw_wb *dpu_hw_wb_init(struct drm_device *dev,
+				 const struct dpu_wb_cfg *cfg,
+				 void __iomem *addr,
+				 const struct dpu_mdss_version *mdss_rev);
 
 #endif /*_DPU_HW_WB_H */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index fe7267b3bff5..723cc1d82143 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -274,9 +274,6 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor)
 	struct dpu_kms *dpu_kms = to_dpu_kms(kms);
 	void *p = dpu_hw_util_get_log_mask_ptr();
 	struct dentry *entry;
-	struct drm_device *dev;
-	struct msm_drm_private *priv;
-	int i;
 
 	if (!p)
 		return -EINVAL;
@@ -285,9 +282,6 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor)
 	if (minor->type != DRM_MINOR_PRIMARY)
 		return 0;
 
-	dev = dpu_kms->dev;
-	priv = dev->dev_private;
-
 	entry = debugfs_create_dir("debug", minor->debugfs_root);
 
 	debugfs_create_x32(DPU_DEBUGFS_HWMASKNAME, 0600, entry, p);
@@ -297,11 +291,6 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor)
 	dpu_debugfs_core_irq_init(dpu_kms, entry);
 	dpu_debugfs_sspp_init(dpu_kms, entry);
 
-	for (i = 0; i < ARRAY_SIZE(priv->dp); i++) {
-		if (priv->dp[i])
-			msm_dp_debugfs_init(priv->dp[i], minor);
-	}
-
 	return dpu_core_perf_debugfs_init(dpu_kms, entry);
 }
 #endif
@@ -597,7 +586,6 @@ static int _dpu_kms_initialize_displayport(struct drm_device *dev,
 		rc = msm_dp_modeset_init(priv->dp[i], dev, encoder);
 		if (rc) {
 			DPU_ERROR("modeset_init failed for DP, rc = %d\n", rc);
-			drm_encoder_cleanup(encoder);
 			return rc;
 		}
 	}
@@ -630,7 +618,6 @@ static int _dpu_kms_initialize_hdmi(struct drm_device *dev,
 	rc = msm_hdmi_modeset_init(priv->hdmi, dev, encoder);
 	if (rc) {
 		DPU_ERROR("modeset_init failed for DP, rc = %d\n", rc);
-		drm_encoder_cleanup(encoder);
 		return rc;
 	}
 
@@ -662,7 +649,6 @@ static int _dpu_kms_initialize_writeback(struct drm_device *dev,
 			n_formats);
 	if (rc) {
 		DPU_ERROR("dpu_writeback_init, rc = %d\n", rc);
-		drm_encoder_cleanup(encoder);
 		return rc;
 	}
 
@@ -806,30 +792,17 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms)
 {
 	int i;
 
-	if (dpu_kms->hw_intr)
-		dpu_hw_intr_destroy(dpu_kms->hw_intr);
 	dpu_kms->hw_intr = NULL;
 
 	/* safe to call these more than once during shutdown */
 	_dpu_kms_mmu_destroy(dpu_kms);
 
-	if (dpu_kms->catalog) {
-		for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) {
-			if (dpu_kms->hw_vbif[i]) {
-				dpu_hw_vbif_destroy(dpu_kms->hw_vbif[i]);
-				dpu_kms->hw_vbif[i] = NULL;
-			}
-		}
+	for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) {
+		dpu_kms->hw_vbif[i] = NULL;
 	}
 
-	if (dpu_kms->rm_init)
-		dpu_rm_destroy(&dpu_kms->rm);
-	dpu_kms->rm_init = false;
-
 	dpu_kms->catalog = NULL;
 
-	if (dpu_kms->hw_mdp)
-		dpu_hw_mdp_destroy(dpu_kms->hw_mdp);
 	dpu_kms->hw_mdp = NULL;
 }
 
@@ -856,7 +829,6 @@ static int dpu_irq_postinstall(struct msm_kms *kms)
 {
 	struct msm_drm_private *priv;
 	struct dpu_kms *dpu_kms = to_dpu_kms(kms);
-	int i;
 
 	if (!dpu_kms || !dpu_kms->dev)
 		return -EINVAL;
@@ -865,9 +837,6 @@ static int dpu_irq_postinstall(struct msm_kms *kms)
 	if (!priv)
 		return -EINVAL;
 
-	for (i = 0; i < ARRAY_SIZE(priv->dp); i++)
-		msm_dp_irq_postinstall(priv->dp[i]);
-
 	return 0;
 }
 
@@ -975,6 +944,10 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k
 		}
 	}
 
+	if (cat->cdm)
+		msm_disp_snapshot_add_block(disp_state, cat->cdm->len,
+					    dpu_kms->mmio + cat->cdm->base, cat->cdm->name);
+
 	pm_runtime_put_sync(&dpu_kms->pdev->dev);
 }
 
@@ -1078,7 +1051,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 	if (!dpu_kms->catalog) {
 		DPU_ERROR("device config not known!\n");
 		rc = -EINVAL;
-		goto power_error;
+		goto err_pm_put;
 	}
 
 	/*
@@ -1088,49 +1061,48 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 	rc = _dpu_kms_mmu_init(dpu_kms);
 	if (rc) {
 		DPU_ERROR("dpu_kms_mmu_init failed: %d\n", rc);
-		goto power_error;
+		goto err_pm_put;
 	}
 
 	dpu_kms->mdss = msm_mdss_get_mdss_data(dpu_kms->pdev->dev.parent);
 	if (IS_ERR(dpu_kms->mdss)) {
 		rc = PTR_ERR(dpu_kms->mdss);
 		DPU_ERROR("failed to get MDSS data: %d\n", rc);
-		goto power_error;
+		goto err_pm_put;
 	}
 
 	if (!dpu_kms->mdss) {
 		rc = -EINVAL;
 		DPU_ERROR("NULL MDSS data\n");
-		goto power_error;
+		goto err_pm_put;
 	}
 
-	rc = dpu_rm_init(&dpu_kms->rm, dpu_kms->catalog, dpu_kms->mdss, dpu_kms->mmio);
+	rc = dpu_rm_init(dev, &dpu_kms->rm, dpu_kms->catalog, dpu_kms->mdss, dpu_kms->mmio);
 	if (rc) {
 		DPU_ERROR("rm init failed: %d\n", rc);
-		goto power_error;
+		goto err_pm_put;
 	}
 
-	dpu_kms->rm_init = true;
-
-	dpu_kms->hw_mdp = dpu_hw_mdptop_init(dpu_kms->catalog->mdp,
+	dpu_kms->hw_mdp = dpu_hw_mdptop_init(dev,
+					     dpu_kms->catalog->mdp,
 					     dpu_kms->mmio,
 					     dpu_kms->catalog);
 	if (IS_ERR(dpu_kms->hw_mdp)) {
 		rc = PTR_ERR(dpu_kms->hw_mdp);
 		DPU_ERROR("failed to get hw_mdp: %d\n", rc);
 		dpu_kms->hw_mdp = NULL;
-		goto power_error;
+		goto err_pm_put;
 	}
 
 	for (i = 0; i < dpu_kms->catalog->vbif_count; i++) {
 		struct dpu_hw_vbif *hw;
 		const struct dpu_vbif_cfg *vbif = &dpu_kms->catalog->vbif[i];
 
-		hw = dpu_hw_vbif_init(vbif, dpu_kms->vbif[vbif->id]);
+		hw = dpu_hw_vbif_init(dev, vbif, dpu_kms->vbif[vbif->id]);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed to init vbif %d: %d\n", vbif->id, rc);
-			goto power_error;
+			goto err_pm_put;
 		}
 
 		dpu_kms->hw_vbif[vbif->id] = hw;
@@ -1146,15 +1118,15 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 	rc = dpu_core_perf_init(&dpu_kms->perf, dpu_kms->catalog->perf, max_core_clk_rate);
 	if (rc) {
 		DPU_ERROR("failed to init perf %d\n", rc);
-		goto perf_err;
+		goto err_pm_put;
 	}
 
-	dpu_kms->hw_intr = dpu_hw_intr_init(dpu_kms->mmio, dpu_kms->catalog);
-	if (IS_ERR_OR_NULL(dpu_kms->hw_intr)) {
+	dpu_kms->hw_intr = dpu_hw_intr_init(dev, dpu_kms->mmio, dpu_kms->catalog);
+	if (IS_ERR(dpu_kms->hw_intr)) {
 		rc = PTR_ERR(dpu_kms->hw_intr);
 		DPU_ERROR("hw_intr init failed: %d\n", rc);
 		dpu_kms->hw_intr = NULL;
-		goto hw_intr_init_err;
+		goto err_pm_put;
 	}
 
 	dev->mode_config.min_width = 0;
@@ -1179,7 +1151,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 	rc = _dpu_kms_drm_obj_init(dpu_kms);
 	if (rc) {
 		DPU_ERROR("modeset init failed: %d\n", rc);
-		goto drm_obj_init_err;
+		goto err_pm_put;
 	}
 
 	dpu_vbif_init_memtypes(dpu_kms);
@@ -1188,10 +1160,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 
 	return 0;
 
-drm_obj_init_err:
-hw_intr_init_err:
-perf_err:
-power_error:
+err_pm_put:
 	pm_runtime_put_sync(&dpu_kms->pdev->dev);
 error:
 	_dpu_kms_hw_destroy(dpu_kms);
@@ -1349,6 +1318,7 @@ static const struct dev_pm_ops dpu_pm_ops = {
 static const struct of_device_id dpu_dt_match[] = {
 	{ .compatible = "qcom,msm8998-dpu", .data = &dpu_msm8998_cfg, },
 	{ .compatible = "qcom,qcm2290-dpu", .data = &dpu_qcm2290_cfg, },
+	{ .compatible = "qcom,sdm670-dpu", .data = &dpu_sdm670_cfg, },
 	{ .compatible = "qcom,sdm845-dpu", .data = &dpu_sdm845_cfg, },
 	{ .compatible = "qcom,sc7180-dpu", .data = &dpu_sc7180_cfg, },
 	{ .compatible = "qcom,sc7280-dpu", .data = &dpu_sc7280_cfg, },
@@ -1363,6 +1333,7 @@ static const struct of_device_id dpu_dt_match[] = {
 	{ .compatible = "qcom,sm8350-dpu", .data = &dpu_sm8350_cfg, },
 	{ .compatible = "qcom,sm8450-dpu", .data = &dpu_sm8450_cfg, },
 	{ .compatible = "qcom,sm8550-dpu", .data = &dpu_sm8550_cfg, },
+	{ .compatible = "qcom,sm8650-dpu", .data = &dpu_sm8650_cfg, },
 	{}
 };
 MODULE_DEVICE_TABLE(of, dpu_dt_match);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
index b6f53ca6e962..d1207f4ec3ae 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -51,6 +51,7 @@
 	} while (0)
 
 #define DPU_ERROR(fmt, ...) pr_err("[dpu error]" fmt, ##__VA_ARGS__)
+#define DPU_ERROR_RATELIMITED(fmt, ...) pr_err_ratelimited("[dpu error]" fmt, ##__VA_ARGS__)
 
 /**
  * ktime_compare_safe - compare two ktime structures
@@ -88,7 +89,6 @@ struct dpu_kms {
 	struct drm_private_obj global_state;
 
 	struct dpu_rm rm;
-	bool rm_init;
 
 	struct dpu_hw_vbif *hw_vbif[VBIF_MAX];
 	struct dpu_hw_mdp *hw_mdp;
@@ -136,6 +136,7 @@ struct dpu_global_state {
 	uint32_t ctl_to_enc_id[CTL_MAX - CTL_0];
 	uint32_t dspp_to_enc_id[DSPP_MAX - DSPP_0];
 	uint32_t dsc_to_enc_id[DSC_MAX - DSC_0];
+	uint32_t cdm_to_enc_id;
 };
 
 struct dpu_global_state
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index 3eef5e025e12..ff975ad51145 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -21,6 +21,7 @@
 #include "dpu_kms.h"
 #include "dpu_formats.h"
 #include "dpu_hw_sspp.h"
+#include "dpu_hw_util.h"
 #include "dpu_trace.h"
 #include "dpu_crtc.h"
 #include "dpu_vbif.h"
@@ -78,8 +79,6 @@ static const uint32_t qcom_compressed_supported_formats[] = {
 struct dpu_plane {
 	struct drm_plane base;
 
-	struct mutex lock;
-
 	enum dpu_sspp pipe;
 
 	uint32_t color_fill;
@@ -470,8 +469,7 @@ static void _dpu_plane_setup_scaler3(struct dpu_hw_sspp *pipe_hw,
 			scale_cfg->src_height[i] /= chroma_subsmpl_v;
 		}
 
-		if (pipe_hw->cap->features &
-			BIT(DPU_SSPP_SCALER_QSEED4)) {
+		if (pipe_hw->cap->sblk->scaler_blk.version >= 0x3000) {
 			scale_cfg->preload_x[i] = DPU_QSEED4_DEFAULT_PRELOAD_H;
 			scale_cfg->preload_y[i] = DPU_QSEED4_DEFAULT_PRELOAD_V;
 		} else {
@@ -511,36 +509,6 @@ static void _dpu_plane_setup_pixel_ext(struct dpu_hw_scaler3_cfg *scale_cfg,
 	}
 }
 
-static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
-	{
-		/* S15.16 format */
-		0x00012A00, 0x00000000, 0x00019880,
-		0x00012A00, 0xFFFF9B80, 0xFFFF3000,
-		0x00012A00, 0x00020480, 0x00000000,
-	},
-	/* signed bias */
-	{ 0xfff0, 0xff80, 0xff80,},
-	{ 0x0, 0x0, 0x0,},
-	/* unsigned clamp */
-	{ 0x10, 0xeb, 0x10, 0xf0, 0x10, 0xf0,},
-	{ 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,},
-};
-
-static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
-	{
-		/* S15.16 format */
-		0x00012A00, 0x00000000, 0x00019880,
-		0x00012A00, 0xFFFF9B80, 0xFFFF3000,
-		0x00012A00, 0x00020480, 0x00000000,
-		},
-	/* signed bias */
-	{ 0xffc0, 0xfe00, 0xfe00,},
-	{ 0x0, 0x0, 0x0,},
-	/* unsigned clamp */
-	{ 0x40, 0x3ac, 0x40, 0x3c0, 0x40, 0x3c0,},
-	{ 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
-};
-
 static const struct dpu_csc_cfg *_dpu_plane_get_csc(struct dpu_sw_pipe *pipe,
 						    const struct dpu_format *fmt)
 {
@@ -774,8 +742,8 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu,
 	min_src_size = DPU_FORMAT_IS_YUV(fmt) ? 2 : 1;
 
 	if (DPU_FORMAT_IS_YUV(fmt) &&
-	    (!(pipe->sspp->cap->features & DPU_SSPP_SCALER) ||
-	     !(pipe->sspp->cap->features & DPU_SSPP_CSC_ANY))) {
+	    (!pipe->sspp->cap->sblk->scaler_blk.len ||
+	     !pipe->sspp->cap->sblk->csc_blk.len)) {
 		DPU_DEBUG_PLANE(pdpu,
 				"plane doesn't have scaler/csc for yuv\n");
 		return -EINVAL;
@@ -824,6 +792,8 @@ static int dpu_plane_atomic_check(struct drm_plane *plane,
 										 plane);
 	int ret = 0, min_scale;
 	struct dpu_plane *pdpu = to_dpu_plane(plane);
+	struct dpu_kms *kms = _dpu_plane_get_kms(&pdpu->base);
+	u64 max_mdp_clk_rate = kms->perf.max_core_clk_rate;
 	struct dpu_plane_state *pstate = to_dpu_plane_state(new_plane_state);
 	struct dpu_sw_pipe *pipe = &pstate->pipe;
 	struct dpu_sw_pipe *r_pipe = &pstate->r_pipe;
@@ -892,14 +862,16 @@ static int dpu_plane_atomic_check(struct drm_plane *plane,
 
 	max_linewidth = pdpu->catalog->caps->max_linewidth;
 
-	if (drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) {
+	if ((drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) ||
+	     _dpu_plane_calc_clk(&crtc_state->adjusted_mode, pipe_cfg) > max_mdp_clk_rate) {
 		/*
 		 * In parallel multirect case only the half of the usual width
 		 * is supported for tiled formats. If we are here, we know that
 		 * full width is more than max_linewidth, thus each rect is
 		 * wider than allowed.
 		 */
-		if (DPU_FORMAT_IS_UBWC(fmt)) {
+		if (DPU_FORMAT_IS_UBWC(fmt) &&
+		    drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) {
 			DPU_DEBUG_PLANE(pdpu, "invalid src " DRM_RECT_FMT " line:%u, tiled format\n",
 					DRM_RECT_ARG(&pipe_cfg->src_rect), max_linewidth);
 			return -E2BIG;
@@ -1213,29 +1185,6 @@ static void dpu_plane_atomic_update(struct drm_plane *plane,
 	}
 }
 
-static void dpu_plane_destroy(struct drm_plane *plane)
-{
-	struct dpu_plane *pdpu = plane ? to_dpu_plane(plane) : NULL;
-	struct dpu_plane_state *pstate;
-
-	DPU_DEBUG_PLANE(pdpu, "\n");
-
-	if (pdpu) {
-		pstate = to_dpu_plane_state(plane->state);
-		_dpu_plane_set_qos_ctrl(plane, &pstate->pipe, false);
-
-		if (pstate->r_pipe.sspp)
-			_dpu_plane_set_qos_ctrl(plane, &pstate->r_pipe, false);
-
-		mutex_destroy(&pdpu->lock);
-
-		/* this will destroy the states as well */
-		drm_plane_cleanup(plane);
-
-		kfree(pdpu);
-	}
-}
-
 static void dpu_plane_destroy_state(struct drm_plane *plane,
 		struct drm_plane_state *state)
 {
@@ -1405,7 +1354,6 @@ static bool dpu_plane_format_mod_supported(struct drm_plane *plane,
 static const struct drm_plane_funcs dpu_plane_funcs = {
 		.update_plane = drm_atomic_helper_update_plane,
 		.disable_plane = drm_atomic_helper_disable_plane,
-		.destroy = dpu_plane_destroy,
 		.reset = dpu_plane_reset,
 		.atomic_duplicate_state = dpu_plane_duplicate_state,
 		.atomic_destroy_state = dpu_plane_destroy_state,
@@ -1433,35 +1381,28 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev,
 	struct dpu_hw_sspp *pipe_hw;
 	uint32_t num_formats;
 	uint32_t supported_rotations;
-	int ret = -EINVAL;
-
-	/* create and zero local structure */
-	pdpu = kzalloc(sizeof(*pdpu), GFP_KERNEL);
-	if (!pdpu) {
-		DPU_ERROR("[%u]failed to allocate local plane struct\n", pipe);
-		ret = -ENOMEM;
-		return ERR_PTR(ret);
-	}
-
-	/* cache local stuff for later */
-	plane = &pdpu->base;
-	pdpu->pipe = pipe;
+	int ret;
 
 	/* initialize underlying h/w driver */
 	pipe_hw = dpu_rm_get_sspp(&kms->rm, pipe);
 	if (!pipe_hw || !pipe_hw->cap || !pipe_hw->cap->sblk) {
 		DPU_ERROR("[%u]SSPP is invalid\n", pipe);
-		goto clean_plane;
+		return ERR_PTR(-EINVAL);
 	}
 
 	format_list = pipe_hw->cap->sblk->format_list;
 	num_formats = pipe_hw->cap->sblk->num_formats;
 
-	ret = drm_universal_plane_init(dev, plane, 0xff, &dpu_plane_funcs,
+	pdpu = drmm_universal_plane_alloc(dev, struct dpu_plane, base,
+				0xff, &dpu_plane_funcs,
 				format_list, num_formats,
 				supported_format_modifiers, type, NULL);
-	if (ret)
-		goto clean_plane;
+	if (IS_ERR(pdpu))
+		return ERR_CAST(pdpu);
+
+	/* cache local stuff for later */
+	plane = &pdpu->base;
+	pdpu->pipe = pipe;
 
 	pdpu->catalog = kms->catalog;
 
@@ -1488,13 +1429,7 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev,
 	/* success! finalize initialization */
 	drm_plane_helper_add(plane, &dpu_plane_helper_funcs);
 
-	mutex_init(&pdpu->lock);
-
 	DPU_DEBUG("%s created for pipe:%u id:%u\n", plane->name,
 					pipe, plane->base.id);
 	return plane;
-
-clean_plane:
-	kfree(pdpu);
-	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
index 8759466e2f37..b58a9c2ae326 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
@@ -8,6 +8,7 @@
 #include "dpu_kms.h"
 #include "dpu_hw_lm.h"
 #include "dpu_hw_ctl.h"
+#include "dpu_hw_cdm.h"
 #include "dpu_hw_pingpong.h"
 #include "dpu_hw_sspp.h"
 #include "dpu_hw_intf.h"
@@ -34,72 +35,8 @@ struct dpu_rm_requirements {
 	struct msm_display_topology topology;
 };
 
-int dpu_rm_destroy(struct dpu_rm *rm)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(rm->dspp_blks); i++) {
-		struct dpu_hw_dspp *hw;
-
-		if (rm->dspp_blks[i]) {
-			hw = to_dpu_hw_dspp(rm->dspp_blks[i]);
-			dpu_hw_dspp_destroy(hw);
-		}
-	}
-	for (i = 0; i < ARRAY_SIZE(rm->pingpong_blks); i++) {
-		struct dpu_hw_pingpong *hw;
-
-		if (rm->pingpong_blks[i]) {
-			hw = to_dpu_hw_pingpong(rm->pingpong_blks[i]);
-			dpu_hw_pingpong_destroy(hw);
-		}
-	}
-	for (i = 0; i < ARRAY_SIZE(rm->merge_3d_blks); i++) {
-		struct dpu_hw_merge_3d *hw;
-
-		if (rm->merge_3d_blks[i]) {
-			hw = to_dpu_hw_merge_3d(rm->merge_3d_blks[i]);
-			dpu_hw_merge_3d_destroy(hw);
-		}
-	}
-	for (i = 0; i < ARRAY_SIZE(rm->mixer_blks); i++) {
-		struct dpu_hw_mixer *hw;
-
-		if (rm->mixer_blks[i]) {
-			hw = to_dpu_hw_mixer(rm->mixer_blks[i]);
-			dpu_hw_lm_destroy(hw);
-		}
-	}
-	for (i = 0; i < ARRAY_SIZE(rm->ctl_blks); i++) {
-		struct dpu_hw_ctl *hw;
-
-		if (rm->ctl_blks[i]) {
-			hw = to_dpu_hw_ctl(rm->ctl_blks[i]);
-			dpu_hw_ctl_destroy(hw);
-		}
-	}
-	for (i = 0; i < ARRAY_SIZE(rm->hw_intf); i++)
-		dpu_hw_intf_destroy(rm->hw_intf[i]);
-
-	for (i = 0; i < ARRAY_SIZE(rm->dsc_blks); i++) {
-		struct dpu_hw_dsc *hw;
-
-		if (rm->dsc_blks[i]) {
-			hw = to_dpu_hw_dsc(rm->dsc_blks[i]);
-			dpu_hw_dsc_destroy(hw);
-		}
-	}
-
-	for (i = 0; i < ARRAY_SIZE(rm->hw_wb); i++)
-		dpu_hw_wb_destroy(rm->hw_wb[i]);
-
-	for (i = 0; i < ARRAY_SIZE(rm->hw_sspp); i++)
-		dpu_hw_sspp_destroy(rm->hw_sspp[i]);
-
-	return 0;
-}
-
-int dpu_rm_init(struct dpu_rm *rm,
+int dpu_rm_init(struct drm_device *dev,
+		struct dpu_rm *rm,
 		const struct dpu_mdss_cfg *cat,
 		const struct msm_mdss_data *mdss_data,
 		void __iomem *mmio)
@@ -119,7 +56,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_mixer *hw;
 		const struct dpu_lm_cfg *lm = &cat->mixer[i];
 
-		hw = dpu_hw_lm_init(lm, mmio);
+		hw = dpu_hw_lm_init(dev, lm, mmio);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed lm object creation: err %d\n", rc);
@@ -132,7 +69,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_merge_3d *hw;
 		const struct dpu_merge_3d_cfg *merge_3d = &cat->merge_3d[i];
 
-		hw = dpu_hw_merge_3d_init(merge_3d, mmio);
+		hw = dpu_hw_merge_3d_init(dev, merge_3d, mmio);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed merge_3d object creation: err %d\n",
@@ -146,7 +83,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_pingpong *hw;
 		const struct dpu_pingpong_cfg *pp = &cat->pingpong[i];
 
-		hw = dpu_hw_pingpong_init(pp, mmio, cat->mdss_ver);
+		hw = dpu_hw_pingpong_init(dev, pp, mmio, cat->mdss_ver);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed pingpong object creation: err %d\n",
@@ -162,7 +99,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_intf *hw;
 		const struct dpu_intf_cfg *intf = &cat->intf[i];
 
-		hw = dpu_hw_intf_init(intf, mmio, cat->mdss_ver);
+		hw = dpu_hw_intf_init(dev, intf, mmio, cat->mdss_ver);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed intf object creation: err %d\n", rc);
@@ -175,7 +112,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_wb *hw;
 		const struct dpu_wb_cfg *wb = &cat->wb[i];
 
-		hw = dpu_hw_wb_init(wb, mmio, cat->mdss_ver);
+		hw = dpu_hw_wb_init(dev, wb, mmio, cat->mdss_ver);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed wb object creation: err %d\n", rc);
@@ -188,7 +125,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_ctl *hw;
 		const struct dpu_ctl_cfg *ctl = &cat->ctl[i];
 
-		hw = dpu_hw_ctl_init(ctl, mmio, cat->mixer_count, cat->mixer);
+		hw = dpu_hw_ctl_init(dev, ctl, mmio, cat->mixer_count, cat->mixer);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed ctl object creation: err %d\n", rc);
@@ -201,7 +138,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_dspp *hw;
 		const struct dpu_dspp_cfg *dspp = &cat->dspp[i];
 
-		hw = dpu_hw_dspp_init(dspp, mmio);
+		hw = dpu_hw_dspp_init(dev, dspp, mmio);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed dspp object creation: err %d\n", rc);
@@ -215,9 +152,9 @@ int dpu_rm_init(struct dpu_rm *rm,
 		const struct dpu_dsc_cfg *dsc = &cat->dsc[i];
 
 		if (test_bit(DPU_DSC_HW_REV_1_2, &dsc->features))
-			hw = dpu_hw_dsc_init_1_2(dsc, mmio);
+			hw = dpu_hw_dsc_init_1_2(dev, dsc, mmio);
 		else
-			hw = dpu_hw_dsc_init(dsc, mmio);
+			hw = dpu_hw_dsc_init(dev, dsc, mmio);
 
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
@@ -231,7 +168,7 @@ int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_hw_sspp *hw;
 		const struct dpu_sspp_cfg *sspp = &cat->sspp[i];
 
-		hw = dpu_hw_sspp_init(sspp, mmio, mdss_data, cat->mdss_ver);
+		hw = dpu_hw_sspp_init(dev, sspp, mmio, mdss_data, cat->mdss_ver);
 		if (IS_ERR(hw)) {
 			rc = PTR_ERR(hw);
 			DPU_ERROR("failed sspp object creation: err %d\n", rc);
@@ -240,11 +177,21 @@ int dpu_rm_init(struct dpu_rm *rm,
 		rm->hw_sspp[sspp->id - SSPP_NONE] = hw;
 	}
 
+	if (cat->cdm) {
+		struct dpu_hw_cdm *hw;
+
+		hw = dpu_hw_cdm_init(dev, cat->cdm, mmio, cat->mdss_ver);
+		if (IS_ERR(hw)) {
+			rc = PTR_ERR(hw);
+			DPU_ERROR("failed cdm object creation: err %d\n", rc);
+			goto fail;
+		}
+		rm->cdm_blk = &hw->base;
+	}
+
 	return 0;
 
 fail:
-	dpu_rm_destroy(rm);
-
 	return rc ? rc : -EFAULT;
 }
 
@@ -488,6 +435,26 @@ static int _dpu_rm_reserve_dsc(struct dpu_rm *rm,
 	return 0;
 }
 
+static int _dpu_rm_reserve_cdm(struct dpu_rm *rm,
+			       struct dpu_global_state *global_state,
+			       struct drm_encoder *enc)
+{
+	/* try allocating only one CDM block */
+	if (!rm->cdm_blk) {
+		DPU_ERROR("CDM block does not exist\n");
+		return -EIO;
+	}
+
+	if (global_state->cdm_to_enc_id) {
+		DPU_ERROR("CDM_0 is already allocated\n");
+		return -EIO;
+	}
+
+	global_state->cdm_to_enc_id = enc->base.id;
+
+	return 0;
+}
+
 static int _dpu_rm_make_reservation(
 		struct dpu_rm *rm,
 		struct dpu_global_state *global_state,
@@ -513,6 +480,14 @@ static int _dpu_rm_make_reservation(
 	if (ret)
 		return ret;
 
+	if (reqs->topology.needs_cdm) {
+		ret = _dpu_rm_reserve_cdm(rm, global_state, enc);
+		if (ret) {
+			DPU_ERROR("unable to find CDM blk\n");
+			return ret;
+		}
+	}
+
 	return ret;
 }
 
@@ -523,9 +498,9 @@ static int _dpu_rm_populate_requirements(
 {
 	reqs->topology = req_topology;
 
-	DRM_DEBUG_KMS("num_lm: %d num_dsc: %d num_intf: %d\n",
+	DRM_DEBUG_KMS("num_lm: %d num_dsc: %d num_intf: %d cdm: %d\n",
 		      reqs->topology.num_lm, reqs->topology.num_dsc,
-		      reqs->topology.num_intf);
+		      reqs->topology.num_intf, reqs->topology.needs_cdm);
 
 	return 0;
 }
@@ -554,6 +529,7 @@ void dpu_rm_release(struct dpu_global_state *global_state,
 		ARRAY_SIZE(global_state->dsc_to_enc_id), enc->base.id);
 	_dpu_rm_clear_mapping(global_state->dspp_to_enc_id,
 		ARRAY_SIZE(global_state->dspp_to_enc_id), enc->base.id);
+	_dpu_rm_clear_mapping(&global_state->cdm_to_enc_id, 1, enc->base.id);
 }
 
 int dpu_rm_reserve(
@@ -627,6 +603,11 @@ int dpu_rm_get_assigned_resources(struct dpu_rm *rm,
 		hw_to_enc_id = global_state->dsc_to_enc_id;
 		max_blks = ARRAY_SIZE(rm->dsc_blks);
 		break;
+	case DPU_HW_BLK_CDM:
+		hw_blks = &rm->cdm_blk;
+		hw_to_enc_id = &global_state->cdm_to_enc_id;
+		max_blks = 1;
+		break;
 	default:
 		DPU_ERROR("blk type %d not managed by rm\n", type);
 		return 0;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h
index 2b551566cbf4..e3f83ebc656b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h
@@ -22,6 +22,7 @@ struct dpu_global_state;
  * @hw_wb: array of wb hardware resources
  * @dspp_blks: array of dspp hardware resources
  * @hw_sspp: array of sspp hardware resources
+ * @cdm_blk: cdm hardware resource
  */
 struct dpu_rm {
 	struct dpu_hw_blk *pingpong_blks[PINGPONG_MAX - PINGPONG_0];
@@ -33,30 +34,26 @@ struct dpu_rm {
 	struct dpu_hw_blk *merge_3d_blks[MERGE_3D_MAX - MERGE_3D_0];
 	struct dpu_hw_blk *dsc_blks[DSC_MAX - DSC_0];
 	struct dpu_hw_sspp *hw_sspp[SSPP_MAX - SSPP_NONE];
+	struct dpu_hw_blk *cdm_blk;
 };
 
 /**
  * dpu_rm_init - Read hardware catalog and create reservation tracking objects
  *	for all HW blocks.
+ * @dev:  Corresponding device for devres management
  * @rm: DPU Resource Manager handle
  * @cat: Pointer to hardware catalog
  * @mdss_data: Pointer to MDSS / UBWC configuration
  * @mmio: mapped register io address of MDP
  * @Return: 0 on Success otherwise -ERROR
  */
-int dpu_rm_init(struct dpu_rm *rm,
+int dpu_rm_init(struct drm_device *dev,
+		struct dpu_rm *rm,
 		const struct dpu_mdss_cfg *cat,
 		const struct msm_mdss_data *mdss_data,
 		void __iomem *mmio);
 
 /**
- * dpu_rm_destroy - Free all memory allocated by dpu_rm_init
- * @rm: DPU Resource Manager handle
- * @Return: 0 on Success otherwise -ERROR
- */
-int dpu_rm_destroy(struct dpu_rm *rm);
-
-/**
  * dpu_rm_reserve - Given a CRTC->Encoder->Connector display chain, analyze
  *	the use connections and user requirements, specified through related
  *	topology control properties, and reserve hardware blocks to that
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c
index 169f9de4a12a..75f93e346282 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c
@@ -6,6 +6,7 @@
 
 #include <drm/drm_crtc.h>
 #include <drm/drm_flip_work.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_mode.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
@@ -123,16 +124,6 @@ static void unref_cursor_worker(struct drm_flip_work *work, void *val)
 	drm_gem_object_put(val);
 }
 
-static void mdp4_crtc_destroy(struct drm_crtc *crtc)
-{
-	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
-
-	drm_crtc_cleanup(crtc);
-	drm_flip_work_cleanup(&mdp4_crtc->unref_cursor_work);
-
-	kfree(mdp4_crtc);
-}
-
 /* statically (for now) map planes to mixer stage (z-order): */
 static const int idxs[] = {
 		[VG1]  = 1,
@@ -269,6 +260,7 @@ static void mdp4_crtc_atomic_disable(struct drm_crtc *crtc,
 {
 	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
 	struct mdp4_kms *mdp4_kms = get_kms(crtc);
+	unsigned long flags;
 
 	DBG("%s", mdp4_crtc->name);
 
@@ -281,6 +273,14 @@ static void mdp4_crtc_atomic_disable(struct drm_crtc *crtc,
 	mdp_irq_unregister(&mdp4_kms->base, &mdp4_crtc->err);
 	mdp4_disable(mdp4_kms);
 
+	if (crtc->state->event && !crtc->state->active) {
+		WARN_ON(mdp4_crtc->event);
+		spin_lock_irqsave(&mdp4_kms->dev->event_lock, flags);
+		drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		crtc->state->event = NULL;
+		spin_unlock_irqrestore(&mdp4_kms->dev->event_lock, flags);
+	}
+
 	mdp4_crtc->enabled = false;
 }
 
@@ -475,7 +475,6 @@ static int mdp4_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 
 static const struct drm_crtc_funcs mdp4_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
-	.destroy = mdp4_crtc_destroy,
 	.page_flip = drm_atomic_helper_page_flip,
 	.cursor_set = mdp4_crtc_cursor_set,
 	.cursor_move = mdp4_crtc_cursor_move,
@@ -616,6 +615,13 @@ static const char *dma_names[] = {
 		"DMA_P", "DMA_S", "DMA_E",
 };
 
+static void mdp4_crtc_flip_cleanup(struct drm_device *dev, void *ptr)
+{
+	struct mdp4_crtc *mdp4_crtc = ptr;
+
+	drm_flip_work_cleanup(&mdp4_crtc->unref_cursor_work);
+}
+
 /* initialize crtc */
 struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
 		struct drm_plane *plane, int id, int ovlp_id,
@@ -623,10 +629,13 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
 {
 	struct drm_crtc *crtc = NULL;
 	struct mdp4_crtc *mdp4_crtc;
+	int ret;
 
-	mdp4_crtc = kzalloc(sizeof(*mdp4_crtc), GFP_KERNEL);
-	if (!mdp4_crtc)
-		return ERR_PTR(-ENOMEM);
+	mdp4_crtc = drmm_crtc_alloc_with_planes(dev, struct mdp4_crtc, base,
+						plane, NULL,
+						&mdp4_crtc_funcs, NULL);
+	if (IS_ERR(mdp4_crtc))
+		return ERR_CAST(mdp4_crtc);
 
 	crtc = &mdp4_crtc->base;
 
@@ -648,9 +657,10 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
 
 	drm_flip_work_init(&mdp4_crtc->unref_cursor_work,
 			"unref cursor", unref_cursor_worker);
+	ret = drmm_add_action_or_reset(dev, mdp4_crtc_flip_cleanup, mdp4_crtc);
+	if (ret)
+		return ERR_PTR(ret);
 
-	drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp4_crtc_funcs,
-				  NULL);
 	drm_crtc_helper_add(crtc, &mdp4_crtc_helper_funcs);
 
 	return crtc;
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c
index 39b8fe53c29d..74dafe7106be 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c
@@ -26,18 +26,6 @@ static struct mdp4_kms *get_kms(struct drm_encoder *encoder)
 	return to_mdp4_kms(to_mdp_kms(priv->kms));
 }
 
-static void mdp4_dsi_encoder_destroy(struct drm_encoder *encoder)
-{
-	struct mdp4_dsi_encoder *mdp4_dsi_encoder = to_mdp4_dsi_encoder(encoder);
-
-	drm_encoder_cleanup(encoder);
-	kfree(mdp4_dsi_encoder);
-}
-
-static const struct drm_encoder_funcs mdp4_dsi_encoder_funcs = {
-	.destroy = mdp4_dsi_encoder_destroy,
-};
-
 static void mdp4_dsi_encoder_mode_set(struct drm_encoder *encoder,
 				      struct drm_display_mode *mode,
 				      struct drm_display_mode *adjusted_mode)
@@ -148,28 +136,18 @@ static const struct drm_encoder_helper_funcs mdp4_dsi_encoder_helper_funcs = {
 /* initialize encoder */
 struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev)
 {
-	struct drm_encoder *encoder = NULL;
+	struct drm_encoder *encoder;
 	struct mdp4_dsi_encoder *mdp4_dsi_encoder;
-	int ret;
 
-	mdp4_dsi_encoder = kzalloc(sizeof(*mdp4_dsi_encoder), GFP_KERNEL);
-	if (!mdp4_dsi_encoder) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	mdp4_dsi_encoder = drmm_encoder_alloc(dev, struct mdp4_dsi_encoder, base,
+					      NULL, DRM_MODE_ENCODER_DSI, NULL);
+	if (IS_ERR(mdp4_dsi_encoder))
+		return ERR_CAST(mdp4_dsi_encoder);
 
 	encoder = &mdp4_dsi_encoder->base;
 
-	drm_encoder_init(dev, encoder, &mdp4_dsi_encoder_funcs,
-			 DRM_MODE_ENCODER_DSI, NULL);
 	drm_encoder_helper_add(encoder, &mdp4_dsi_encoder_helper_funcs);
 
 	return encoder;
-
-fail:
-	if (encoder)
-		mdp4_dsi_encoder_destroy(encoder);
-
-	return ERR_PTR(ret);
 }
 #endif /* CONFIG_DRM_MSM_DSI */
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c
index 88645dbc3785..3b70764b48c4 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c
@@ -25,17 +25,6 @@ static struct mdp4_kms *get_kms(struct drm_encoder *encoder)
 	return to_mdp4_kms(to_mdp_kms(priv->kms));
 }
 
-static void mdp4_dtv_encoder_destroy(struct drm_encoder *encoder)
-{
-	struct mdp4_dtv_encoder *mdp4_dtv_encoder = to_mdp4_dtv_encoder(encoder);
-	drm_encoder_cleanup(encoder);
-	kfree(mdp4_dtv_encoder);
-}
-
-static const struct drm_encoder_funcs mdp4_dtv_encoder_funcs = {
-	.destroy = mdp4_dtv_encoder_destroy,
-};
-
 static void mdp4_dtv_encoder_mode_set(struct drm_encoder *encoder,
 		struct drm_display_mode *mode,
 		struct drm_display_mode *adjusted_mode)
@@ -173,41 +162,29 @@ long mdp4_dtv_round_pixclk(struct drm_encoder *encoder, unsigned long rate)
 /* initialize encoder */
 struct drm_encoder *mdp4_dtv_encoder_init(struct drm_device *dev)
 {
-	struct drm_encoder *encoder = NULL;
+	struct drm_encoder *encoder;
 	struct mdp4_dtv_encoder *mdp4_dtv_encoder;
-	int ret;
 
-	mdp4_dtv_encoder = kzalloc(sizeof(*mdp4_dtv_encoder), GFP_KERNEL);
-	if (!mdp4_dtv_encoder) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	mdp4_dtv_encoder = drmm_encoder_alloc(dev, struct mdp4_dtv_encoder, base,
+					      NULL, DRM_MODE_ENCODER_TMDS, NULL);
+	if (IS_ERR(mdp4_dtv_encoder))
+		return ERR_CAST(mdp4_dtv_encoder);
 
 	encoder = &mdp4_dtv_encoder->base;
 
-	drm_encoder_init(dev, encoder, &mdp4_dtv_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
 	drm_encoder_helper_add(encoder, &mdp4_dtv_encoder_helper_funcs);
 
 	mdp4_dtv_encoder->hdmi_clk = devm_clk_get(dev->dev, "hdmi_clk");
 	if (IS_ERR(mdp4_dtv_encoder->hdmi_clk)) {
 		DRM_DEV_ERROR(dev->dev, "failed to get hdmi_clk\n");
-		ret = PTR_ERR(mdp4_dtv_encoder->hdmi_clk);
-		goto fail;
+		return ERR_CAST(mdp4_dtv_encoder->hdmi_clk);
 	}
 
 	mdp4_dtv_encoder->mdp_clk = devm_clk_get(dev->dev, "tv_clk");
 	if (IS_ERR(mdp4_dtv_encoder->mdp_clk)) {
 		DRM_DEV_ERROR(dev->dev, "failed to get tv_clk\n");
-		ret = PTR_ERR(mdp4_dtv_encoder->mdp_clk);
-		goto fail;
+		return ERR_CAST(mdp4_dtv_encoder->mdp_clk);
 	}
 
 	return encoder;
-
-fail:
-	if (encoder)
-		mdp4_dtv_encoder_destroy(encoder);
-
-	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
index 10eb3e5b218e..576995ddce37 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
@@ -18,7 +18,7 @@ struct mdp4_lcdc_encoder {
 	struct drm_panel *panel;
 	struct clk *lcdc_clk;
 	unsigned long int pixclock;
-	struct regulator *regs[3];
+	struct regulator_bulk_data regs[3];
 	bool enabled;
 	uint32_t bsc;
 };
@@ -30,18 +30,6 @@ static struct mdp4_kms *get_kms(struct drm_encoder *encoder)
 	return to_mdp4_kms(to_mdp_kms(priv->kms));
 }
 
-static void mdp4_lcdc_encoder_destroy(struct drm_encoder *encoder)
-{
-	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder =
-			to_mdp4_lcdc_encoder(encoder);
-	drm_encoder_cleanup(encoder);
-	kfree(mdp4_lcdc_encoder);
-}
-
-static const struct drm_encoder_funcs mdp4_lcdc_encoder_funcs = {
-	.destroy = mdp4_lcdc_encoder_destroy,
-};
-
 /* this should probably be a helper: */
 static struct drm_connector *get_connector(struct drm_encoder *encoder)
 {
@@ -271,12 +259,10 @@ static void mdp4_lcdc_encoder_mode_set(struct drm_encoder *encoder,
 
 static void mdp4_lcdc_encoder_disable(struct drm_encoder *encoder)
 {
-	struct drm_device *dev = encoder->dev;
 	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder =
 			to_mdp4_lcdc_encoder(encoder);
 	struct mdp4_kms *mdp4_kms = get_kms(encoder);
 	struct drm_panel *panel;
-	int i, ret;
 
 	if (WARN_ON(!mdp4_lcdc_encoder->enabled))
 		return;
@@ -301,11 +287,8 @@ static void mdp4_lcdc_encoder_disable(struct drm_encoder *encoder)
 
 	clk_disable_unprepare(mdp4_lcdc_encoder->lcdc_clk);
 
-	for (i = 0; i < ARRAY_SIZE(mdp4_lcdc_encoder->regs); i++) {
-		ret = regulator_disable(mdp4_lcdc_encoder->regs[i]);
-		if (ret)
-			DRM_DEV_ERROR(dev->dev, "failed to disable regulator: %d\n", ret);
-	}
+	regulator_bulk_disable(ARRAY_SIZE(mdp4_lcdc_encoder->regs),
+			       mdp4_lcdc_encoder->regs);
 
 	mdp4_lcdc_encoder->enabled = false;
 }
@@ -319,7 +302,7 @@ static void mdp4_lcdc_encoder_enable(struct drm_encoder *encoder)
 	struct mdp4_kms *mdp4_kms = get_kms(encoder);
 	struct drm_panel *panel;
 	uint32_t config;
-	int i, ret;
+	int ret;
 
 	if (WARN_ON(mdp4_lcdc_encoder->enabled))
 		return;
@@ -339,11 +322,10 @@ static void mdp4_lcdc_encoder_enable(struct drm_encoder *encoder)
 	mdp4_crtc_set_config(encoder->crtc, config);
 	mdp4_crtc_set_intf(encoder->crtc, INTF_LCDC_DTV, 0);
 
-	for (i = 0; i < ARRAY_SIZE(mdp4_lcdc_encoder->regs); i++) {
-		ret = regulator_enable(mdp4_lcdc_encoder->regs[i]);
-		if (ret)
-			DRM_DEV_ERROR(dev->dev, "failed to enable regulator: %d\n", ret);
-	}
+	ret = regulator_bulk_enable(ARRAY_SIZE(mdp4_lcdc_encoder->regs),
+				    mdp4_lcdc_encoder->regs);
+	if (ret)
+		DRM_DEV_ERROR(dev->dev, "failed to enable regulators: %d\n", ret);
 
 	DBG("setting lcdc_clk=%lu", pc);
 	ret = clk_set_rate(mdp4_lcdc_encoder->lcdc_clk, pc);
@@ -383,63 +365,38 @@ long mdp4_lcdc_round_pixclk(struct drm_encoder *encoder, unsigned long rate)
 struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev,
 		struct device_node *panel_node)
 {
-	struct drm_encoder *encoder = NULL;
+	struct drm_encoder *encoder;
 	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder;
-	struct regulator *reg;
 	int ret;
 
-	mdp4_lcdc_encoder = kzalloc(sizeof(*mdp4_lcdc_encoder), GFP_KERNEL);
-	if (!mdp4_lcdc_encoder) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	mdp4_lcdc_encoder = drmm_encoder_alloc(dev, struct mdp4_lcdc_encoder, base,
+					       NULL, DRM_MODE_ENCODER_LVDS, NULL);
+	if (IS_ERR(mdp4_lcdc_encoder))
+		return ERR_CAST(mdp4_lcdc_encoder);
 
 	mdp4_lcdc_encoder->panel_node = panel_node;
 
 	encoder = &mdp4_lcdc_encoder->base;
 
-	drm_encoder_init(dev, encoder, &mdp4_lcdc_encoder_funcs,
-			 DRM_MODE_ENCODER_LVDS, NULL);
 	drm_encoder_helper_add(encoder, &mdp4_lcdc_encoder_helper_funcs);
 
 	/* TODO: do we need different pll in other cases? */
 	mdp4_lcdc_encoder->lcdc_clk = mpd4_lvds_pll_init(dev);
 	if (IS_ERR(mdp4_lcdc_encoder->lcdc_clk)) {
 		DRM_DEV_ERROR(dev->dev, "failed to get lvds_clk\n");
-		ret = PTR_ERR(mdp4_lcdc_encoder->lcdc_clk);
-		goto fail;
+		return ERR_CAST(mdp4_lcdc_encoder->lcdc_clk);
 	}
 
 	/* TODO: different regulators in other cases? */
-	reg = devm_regulator_get(dev->dev, "lvds-vccs-3p3v");
-	if (IS_ERR(reg)) {
-		ret = PTR_ERR(reg);
-		DRM_DEV_ERROR(dev->dev, "failed to get lvds-vccs-3p3v: %d\n", ret);
-		goto fail;
-	}
-	mdp4_lcdc_encoder->regs[0] = reg;
-
-	reg = devm_regulator_get(dev->dev, "lvds-pll-vdda");
-	if (IS_ERR(reg)) {
-		ret = PTR_ERR(reg);
-		DRM_DEV_ERROR(dev->dev, "failed to get lvds-pll-vdda: %d\n", ret);
-		goto fail;
-	}
-	mdp4_lcdc_encoder->regs[1] = reg;
+	mdp4_lcdc_encoder->regs[0].supply = "lvds-vccs-3p3v";
+	mdp4_lcdc_encoder->regs[1].supply = "lvds-vccs-3p3v";
+	mdp4_lcdc_encoder->regs[2].supply = "lvds-vdda";
 
-	reg = devm_regulator_get(dev->dev, "lvds-vdda");
-	if (IS_ERR(reg)) {
-		ret = PTR_ERR(reg);
-		DRM_DEV_ERROR(dev->dev, "failed to get lvds-vdda: %d\n", ret);
-		goto fail;
-	}
-	mdp4_lcdc_encoder->regs[2] = reg;
+	ret = devm_regulator_bulk_get(dev->dev,
+				      ARRAY_SIZE(mdp4_lcdc_encoder->regs),
+				      mdp4_lcdc_encoder->regs);
+	if (ret)
+		return ERR_PTR(ret);
 
 	return encoder;
-
-fail:
-	if (encoder)
-		mdp4_lcdc_encoder_destroy(encoder);
-
-	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
index 694d54341337..c5179e4c393c 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
@@ -1350,23 +1350,17 @@ int mdp5_cfg_get_hw_rev(struct mdp5_cfg_handler *cfg_handler)
 	return cfg_handler->revision;
 }
 
-void mdp5_cfg_destroy(struct mdp5_cfg_handler *cfg_handler)
-{
-	kfree(cfg_handler);
-}
-
 struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms,
 		uint32_t major, uint32_t minor)
 {
 	struct drm_device *dev = mdp5_kms->dev;
 	struct mdp5_cfg_handler *cfg_handler;
 	const struct mdp5_cfg_handler *cfg_handlers;
-	int i, ret = 0, num_handlers;
+	int i, num_handlers;
 
-	cfg_handler = kzalloc(sizeof(*cfg_handler), GFP_KERNEL);
+	cfg_handler = devm_kzalloc(dev->dev, sizeof(*cfg_handler), GFP_KERNEL);
 	if (unlikely(!cfg_handler)) {
-		ret = -ENOMEM;
-		goto fail;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	switch (major) {
@@ -1381,8 +1375,7 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms,
 	default:
 		DRM_DEV_ERROR(dev->dev, "unexpected MDP major version: v%d.%d\n",
 				major, minor);
-		ret = -ENXIO;
-		goto fail;
+		return ERR_PTR(-ENXIO);
 	}
 
 	/* only after mdp5_cfg global pointer's init can we access the hw */
@@ -1396,8 +1389,7 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms,
 	if (unlikely(!mdp5_cfg)) {
 		DRM_DEV_ERROR(dev->dev, "unexpected MDP minor revision: v%d.%d\n",
 				major, minor);
-		ret = -ENXIO;
-		goto fail;
+		return ERR_PTR(-ENXIO);
 	}
 
 	cfg_handler->revision = minor;
@@ -1406,10 +1398,4 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms,
 	DBG("MDP5: %s hw config selected", mdp5_cfg->name);
 
 	return cfg_handler;
-
-fail:
-	if (cfg_handler)
-		mdp5_cfg_destroy(cfg_handler);
-
-	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h
index c2502cc33864..26c5d8b4ab46 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h
@@ -121,6 +121,5 @@ int mdp5_cfg_get_hw_rev(struct mdp5_cfg_handler *cfg_hnd);
 
 struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms,
 		uint32_t major, uint32_t minor);
-void mdp5_cfg_destroy(struct mdp5_cfg_handler *cfg_hnd);
 
 #endif /* __MDP5_CFG_H__ */
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
index 86036dd4e1e8..4a3db2ea1689 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
@@ -13,6 +13,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_flip_work.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
 
@@ -172,14 +173,11 @@ static void unref_cursor_worker(struct drm_flip_work *work, void *val)
 	drm_gem_object_put(val);
 }
 
-static void mdp5_crtc_destroy(struct drm_crtc *crtc)
+static void mdp5_crtc_flip_cleanup(struct drm_device *dev, void *ptr)
 {
-	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc *mdp5_crtc = ptr;
 
-	drm_crtc_cleanup(crtc);
 	drm_flip_work_cleanup(&mdp5_crtc->unref_cursor_work);
-
-	kfree(mdp5_crtc);
 }
 
 static inline u32 mdp5_lm_use_fg_alpha_mask(enum mdp_mixer_stage_id stage)
@@ -1147,7 +1145,6 @@ static void mdp5_crtc_reset(struct drm_crtc *crtc)
 
 static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = {
 	.set_config = drm_atomic_helper_set_config,
-	.destroy = mdp5_crtc_destroy,
 	.page_flip = drm_atomic_helper_page_flip,
 	.reset = mdp5_crtc_reset,
 	.atomic_duplicate_state = mdp5_crtc_duplicate_state,
@@ -1161,7 +1158,6 @@ static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = {
 
 static const struct drm_crtc_funcs mdp5_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
-	.destroy = mdp5_crtc_destroy,
 	.page_flip = drm_atomic_helper_page_flip,
 	.reset = mdp5_crtc_reset,
 	.atomic_duplicate_state = mdp5_crtc_duplicate_state,
@@ -1327,10 +1323,16 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev,
 {
 	struct drm_crtc *crtc = NULL;
 	struct mdp5_crtc *mdp5_crtc;
+	int ret;
 
-	mdp5_crtc = kzalloc(sizeof(*mdp5_crtc), GFP_KERNEL);
-	if (!mdp5_crtc)
-		return ERR_PTR(-ENOMEM);
+	mdp5_crtc = drmm_crtc_alloc_with_planes(dev, struct mdp5_crtc, base,
+						plane, cursor_plane,
+						cursor_plane ?
+						&mdp5_crtc_no_lm_cursor_funcs :
+						&mdp5_crtc_funcs,
+						NULL);
+	if (IS_ERR(mdp5_crtc))
+		return ERR_CAST(mdp5_crtc);
 
 	crtc = &mdp5_crtc->base;
 
@@ -1346,13 +1348,11 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev,
 
 	mdp5_crtc->lm_cursor_enabled = cursor_plane ? false : true;
 
-	drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane,
-				  cursor_plane ?
-				  &mdp5_crtc_no_lm_cursor_funcs :
-				  &mdp5_crtc_funcs, NULL);
-
 	drm_flip_work_init(&mdp5_crtc->unref_cursor_work,
 			"unref cursor", unref_cursor_worker);
+	ret = drmm_add_action_or_reset(dev, mdp5_crtc_flip_cleanup, mdp5_crtc);
+	if (ret)
+		return ERR_PTR(ret);
 
 	drm_crtc_helper_add(crtc, &mdp5_crtc_helper_funcs);
 
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c
index 1220f2b20e05..666de99a46a5 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c
@@ -681,11 +681,6 @@ void mdp5_ctlm_hw_reset(struct mdp5_ctl_manager *ctl_mgr)
 	}
 }
 
-void mdp5_ctlm_destroy(struct mdp5_ctl_manager *ctl_mgr)
-{
-	kfree(ctl_mgr);
-}
-
 struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev,
 		void __iomem *mmio_base, struct mdp5_cfg_handler *cfg_hnd)
 {
@@ -697,18 +692,16 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev,
 	unsigned long flags;
 	int c, ret;
 
-	ctl_mgr = kzalloc(sizeof(*ctl_mgr), GFP_KERNEL);
+	ctl_mgr = devm_kzalloc(dev->dev, sizeof(*ctl_mgr), GFP_KERNEL);
 	if (!ctl_mgr) {
 		DRM_DEV_ERROR(dev->dev, "failed to allocate CTL manager\n");
-		ret = -ENOMEM;
-		goto fail;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	if (WARN_ON(ctl_cfg->count > MAX_CTL)) {
 		DRM_DEV_ERROR(dev->dev, "Increase static pool size to at least %d\n",
 				ctl_cfg->count);
-		ret = -ENOSPC;
-		goto fail;
+		return ERR_PTR(-ENOSPC);
 	}
 
 	/* initialize the CTL manager: */
@@ -727,7 +720,7 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev,
 			DRM_DEV_ERROR(dev->dev, "CTL_%d: base is null!\n", c);
 			ret = -EINVAL;
 			spin_unlock_irqrestore(&ctl_mgr->pool_lock, flags);
-			goto fail;
+			return ERR_PTR(ret);
 		}
 		ctl->ctlm = ctl_mgr;
 		ctl->id = c;
@@ -755,10 +748,4 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev,
 	DBG("Pool of %d CTLs created.", ctl_mgr->nctl);
 
 	return ctl_mgr;
-
-fail:
-	if (ctl_mgr)
-		mdp5_ctlm_destroy(ctl_mgr);
-
-	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h
index c2af68aa77ae..9020e8efc4e4 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h
@@ -17,7 +17,6 @@ struct mdp5_ctl_manager;
 struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev,
 		void __iomem *mmio_base, struct mdp5_cfg_handler *cfg_hnd);
 void mdp5_ctlm_hw_reset(struct mdp5_ctl_manager *ctlm);
-void mdp5_ctlm_destroy(struct mdp5_ctl_manager *ctlm);
 
 /*
  * CTL prototypes:
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c
index 79d67c495780..8db97083e14d 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c
@@ -16,17 +16,6 @@ static struct mdp5_kms *get_kms(struct drm_encoder *encoder)
 	return to_mdp5_kms(to_mdp_kms(priv->kms));
 }
 
-static void mdp5_encoder_destroy(struct drm_encoder *encoder)
-{
-	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
-	drm_encoder_cleanup(encoder);
-	kfree(mdp5_encoder);
-}
-
-static const struct drm_encoder_funcs mdp5_encoder_funcs = {
-	.destroy = mdp5_encoder_destroy,
-};
-
 static void mdp5_vid_encoder_mode_set(struct drm_encoder *encoder,
 				      struct drm_display_mode *mode,
 				      struct drm_display_mode *adjusted_mode)
@@ -342,13 +331,11 @@ struct drm_encoder *mdp5_encoder_init(struct drm_device *dev,
 	struct mdp5_encoder *mdp5_encoder;
 	int enc_type = (intf->type == INTF_DSI) ?
 		DRM_MODE_ENCODER_DSI : DRM_MODE_ENCODER_TMDS;
-	int ret;
 
-	mdp5_encoder = kzalloc(sizeof(*mdp5_encoder), GFP_KERNEL);
-	if (!mdp5_encoder) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	mdp5_encoder = drmm_encoder_alloc(dev, struct mdp5_encoder, base,
+					  NULL, enc_type, NULL);
+	if (IS_ERR(mdp5_encoder))
+		return ERR_CAST(mdp5_encoder);
 
 	encoder = &mdp5_encoder->base;
 	mdp5_encoder->ctl = ctl;
@@ -356,15 +343,7 @@ struct drm_encoder *mdp5_encoder_init(struct drm_device *dev,
 
 	spin_lock_init(&mdp5_encoder->intf_lock);
 
-	drm_encoder_init(dev, encoder, &mdp5_encoder_funcs, enc_type, NULL);
-
 	drm_encoder_helper_add(encoder, &mdp5_encoder_helper_funcs);
 
 	return encoder;
-
-fail:
-	if (encoder)
-		mdp5_encoder_destroy(encoder);
-
-	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index ec933d597e20..0827634664ae 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -209,13 +209,6 @@ static void mdp5_kms_destroy(struct msm_kms *kms)
 {
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
 	struct msm_gem_address_space *aspace = kms->aspace;
-	int i;
-
-	for (i = 0; i < mdp5_kms->num_hwmixers; i++)
-		mdp5_mixer_destroy(mdp5_kms->hwmixers[i]);
-
-	for (i = 0; i < mdp5_kms->num_hwpipes; i++)
-		mdp5_pipe_destroy(mdp5_kms->hwpipes[i]);
 
 	if (aspace) {
 		aspace->mmu->funcs->detach(aspace->mmu);
@@ -623,18 +616,6 @@ fail:
 
 static void mdp5_destroy(struct mdp5_kms *mdp5_kms)
 {
-	int i;
-
-	if (mdp5_kms->ctlm)
-		mdp5_ctlm_destroy(mdp5_kms->ctlm);
-	if (mdp5_kms->smp)
-		mdp5_smp_destroy(mdp5_kms->smp);
-	if (mdp5_kms->cfg)
-		mdp5_cfg_destroy(mdp5_kms->cfg);
-
-	for (i = 0; i < mdp5_kms->num_intfs; i++)
-		kfree(mdp5_kms->intfs[i]);
-
 	if (mdp5_kms->rpm_enabled)
 		pm_runtime_disable(&mdp5_kms->pdev->dev);
 
@@ -652,7 +633,7 @@ static int construct_pipes(struct mdp5_kms *mdp5_kms, int cnt,
 	for (i = 0; i < cnt; i++) {
 		struct mdp5_hw_pipe *hwpipe;
 
-		hwpipe = mdp5_pipe_init(pipes[i], offsets[i], caps);
+		hwpipe = mdp5_pipe_init(dev, pipes[i], offsets[i], caps);
 		if (IS_ERR(hwpipe)) {
 			ret = PTR_ERR(hwpipe);
 			DRM_DEV_ERROR(dev->dev, "failed to construct pipe for %s (%d)\n",
@@ -724,7 +705,7 @@ static int hwmixer_init(struct mdp5_kms *mdp5_kms)
 	for (i = 0; i < hw_cfg->lm.count; i++) {
 		struct mdp5_hw_mixer *mixer;
 
-		mixer = mdp5_mixer_init(&hw_cfg->lm.instances[i]);
+		mixer = mdp5_mixer_init(dev, &hw_cfg->lm.instances[i]);
 		if (IS_ERR(mixer)) {
 			ret = PTR_ERR(mixer);
 			DRM_DEV_ERROR(dev->dev, "failed to construct LM%d (%d)\n",
@@ -755,7 +736,7 @@ static int interface_init(struct mdp5_kms *mdp5_kms)
 		if (intf_types[i] == INTF_DISABLED)
 			continue;
 
-		intf = kzalloc(sizeof(*intf), GFP_KERNEL);
+		intf = devm_kzalloc(dev->dev, sizeof(*intf), GFP_KERNEL);
 		if (!intf) {
 			DRM_DEV_ERROR(dev->dev, "failed to construct INTF%d\n", i);
 			return -ENOMEM;
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c
index 2536def2a000..2822b533f807 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c
@@ -140,20 +140,16 @@ int mdp5_mixer_release(struct drm_atomic_state *s, struct mdp5_hw_mixer *mixer)
 	return 0;
 }
 
-void mdp5_mixer_destroy(struct mdp5_hw_mixer *mixer)
-{
-	kfree(mixer);
-}
-
 static const char * const mixer_names[] = {
 	"LM0", "LM1", "LM2", "LM3", "LM4", "LM5",
 };
 
-struct mdp5_hw_mixer *mdp5_mixer_init(const struct mdp5_lm_instance *lm)
+struct mdp5_hw_mixer *mdp5_mixer_init(struct drm_device *dev,
+				      const struct mdp5_lm_instance *lm)
 {
 	struct mdp5_hw_mixer *mixer;
 
-	mixer = kzalloc(sizeof(*mixer), GFP_KERNEL);
+	mixer = devm_kzalloc(dev->dev, sizeof(*mixer), GFP_KERNEL);
 	if (!mixer)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h
index 545ee223b9d7..2bedd75835bc 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h
@@ -25,8 +25,8 @@ struct mdp5_hw_mixer_state {
 	struct drm_crtc *hwmixer_to_crtc[8];
 };
 
-struct mdp5_hw_mixer *mdp5_mixer_init(const struct mdp5_lm_instance *lm);
-void mdp5_mixer_destroy(struct mdp5_hw_mixer *lm);
+struct mdp5_hw_mixer *mdp5_mixer_init(struct drm_device *dev,
+				      const struct mdp5_lm_instance *lm);
 int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc,
 		      uint32_t caps, struct mdp5_hw_mixer **mixer,
 		      struct mdp5_hw_mixer **r_mixer);
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c
index e4b8a789835a..99b2c30b1d48 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c
@@ -151,17 +151,13 @@ int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe)
 	return 0;
 }
 
-void mdp5_pipe_destroy(struct mdp5_hw_pipe *hwpipe)
-{
-	kfree(hwpipe);
-}
-
-struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe,
+struct mdp5_hw_pipe *mdp5_pipe_init(struct drm_device *dev,
+		enum mdp5_pipe pipe,
 		uint32_t reg_offset, uint32_t caps)
 {
 	struct mdp5_hw_pipe *hwpipe;
 
-	hwpipe = kzalloc(sizeof(*hwpipe), GFP_KERNEL);
+	hwpipe = devm_kzalloc(dev->dev, sizeof(*hwpipe), GFP_KERNEL);
 	if (!hwpipe)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h
index cca67938cab2..452138821f60 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h
@@ -39,8 +39,8 @@ int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane,
 		     struct mdp5_hw_pipe **r_hwpipe);
 int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe);
 
-struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe,
+struct mdp5_hw_pipe *mdp5_pipe_init(struct drm_device *dev,
+		enum mdp5_pipe pipe,
 		uint32_t reg_offset, uint32_t caps);
-void mdp5_pipe_destroy(struct mdp5_hw_pipe *hwpipe);
 
 #endif /* __MDP5_PIPE_H__ */
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
index b68682c1b5bc..8b59562e29e2 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
@@ -370,23 +370,17 @@ void mdp5_smp_dump(struct mdp5_smp *smp, struct drm_printer *p)
 		drm_modeset_unlock(&mdp5_kms->glob_state_lock);
 }
 
-void mdp5_smp_destroy(struct mdp5_smp *smp)
-{
-	kfree(smp);
-}
 
 struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms, const struct mdp5_smp_block *cfg)
 {
+	struct drm_device *dev = mdp5_kms->dev;
 	struct mdp5_smp_state *state;
 	struct mdp5_global_state *global_state;
 	struct mdp5_smp *smp;
-	int ret;
 
-	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
-	if (unlikely(!smp)) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	smp = devm_kzalloc(dev->dev, sizeof(*smp), GFP_KERNEL);
+	if (unlikely(!smp))
+		return ERR_PTR(-ENOMEM);
 
 	smp->dev = mdp5_kms->dev;
 	smp->blk_cnt = cfg->mmb_count;
@@ -400,9 +394,4 @@ struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms, const struct mdp5_smp_
 	memcpy(smp->reserved, cfg->reserved, sizeof(smp->reserved));
 
 	return smp;
-fail:
-	if (smp)
-		mdp5_smp_destroy(smp);
-
-	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h
index ba5618e136c3..d8b6a11413d9 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h
@@ -68,7 +68,6 @@ struct mdp5_smp;
 
 struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms,
 		const struct mdp5_smp_block *cfg);
-void  mdp5_smp_destroy(struct mdp5_smp *smp);
 
 void mdp5_smp_dump(struct mdp5_smp *smp, struct drm_printer *p);
 
diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c
index 8e3b677f35e6..03f4951c49f4 100644
--- a/drivers/gpu/drm/msm/dp/dp_aux.c
+++ b/drivers/gpu/drm/msm/dp/dp_aux.c
@@ -291,6 +291,10 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux,
 		return -EINVAL;
 	}
 
+	ret = pm_runtime_resume_and_get(dp_aux->dev);
+	if (ret)
+		return  ret;
+
 	mutex_lock(&aux->mutex);
 	if (!aux->initted) {
 		ret = -EIO;
@@ -364,6 +368,7 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux,
 
 exit:
 	mutex_unlock(&aux->mutex);
+	pm_runtime_put_sync(dp_aux->dev);
 
 	return ret;
 }
@@ -474,7 +479,6 @@ void dp_aux_deinit(struct drm_dp_aux *dp_aux)
 
 int dp_aux_register(struct drm_dp_aux *dp_aux)
 {
-	struct dp_aux_private *aux;
 	int ret;
 
 	if (!dp_aux) {
@@ -482,12 +486,7 @@ int dp_aux_register(struct drm_dp_aux *dp_aux)
 		return -EINVAL;
 	}
 
-	aux = container_of(dp_aux, struct dp_aux_private, dp_aux);
-
-	aux->dp_aux.name = "dpu_dp_aux";
-	aux->dp_aux.dev = aux->dev;
-	aux->dp_aux.transfer = dp_aux_transfer;
-	ret = drm_dp_aux_register(&aux->dp_aux);
+	ret = drm_dp_aux_register(dp_aux);
 	if (ret) {
 		DRM_ERROR("%s: failed to register drm aux: %d\n", __func__,
 				ret);
@@ -502,6 +501,21 @@ void dp_aux_unregister(struct drm_dp_aux *dp_aux)
 	drm_dp_aux_unregister(dp_aux);
 }
 
+static int dp_wait_hpd_asserted(struct drm_dp_aux *dp_aux,
+				 unsigned long wait_us)
+{
+	int ret;
+	struct dp_aux_private *aux;
+
+	aux = container_of(dp_aux, struct dp_aux_private, dp_aux);
+
+	pm_runtime_get_sync(aux->dev);
+	ret = dp_catalog_aux_wait_for_hpd_connect_state(aux->catalog);
+	pm_runtime_put_sync(aux->dev);
+
+	return ret;
+}
+
 struct drm_dp_aux *dp_aux_get(struct device *dev, struct dp_catalog *catalog,
 			      bool is_edp)
 {
@@ -525,6 +539,17 @@ struct drm_dp_aux *dp_aux_get(struct device *dev, struct dp_catalog *catalog,
 	aux->catalog = catalog;
 	aux->retry_cnt = 0;
 
+	/*
+	 * Use the drm_dp_aux_init() to use the aux adapter
+	 * before registering AUX with the DRM device so that
+	 * msm eDP panel can be detected by generic_dep_panel_probe().
+	 */
+	aux->dp_aux.name = "dpu_dp_aux";
+	aux->dp_aux.dev = dev;
+	aux->dp_aux.transfer = dp_aux_transfer;
+	aux->dp_aux.wait_hpd_asserted = dp_wait_hpd_asserted;
+	drm_dp_aux_init(&aux->dp_aux);
+
 	return &aux->dp_aux;
 }
 
diff --git a/drivers/gpu/drm/msm/dp/dp_debug.c b/drivers/gpu/drm/msm/dp/dp_debug.c
index 3bba901afe33..6c281dc095b9 100644
--- a/drivers/gpu/drm/msm/dp/dp_debug.c
+++ b/drivers/gpu/drm/msm/dp/dp_debug.c
@@ -19,13 +19,9 @@
 #define DEBUG_NAME "msm_dp"
 
 struct dp_debug_private {
-	struct dentry *root;
-
 	struct dp_link *link;
 	struct dp_panel *panel;
 	struct drm_connector *connector;
-	struct device *dev;
-	struct drm_device *drm_dev;
 
 	struct dp_debug dp_debug;
 };
@@ -204,35 +200,33 @@ static const struct file_operations test_active_fops = {
 	.write = dp_test_active_write
 };
 
-static void dp_debug_init(struct dp_debug *dp_debug, struct drm_minor *minor)
+static void dp_debug_init(struct dp_debug *dp_debug, struct dentry *root, bool is_edp)
 {
-	char path[64];
 	struct dp_debug_private *debug = container_of(dp_debug,
 			struct dp_debug_private, dp_debug);
 
-	snprintf(path, sizeof(path), "msm_dp-%s", debug->connector->name);
-
-	debug->root = debugfs_create_dir(path, minor->debugfs_root);
-
-	debugfs_create_file("dp_debug", 0444, debug->root,
+	debugfs_create_file("dp_debug", 0444, root,
 			debug, &dp_debug_fops);
 
-	debugfs_create_file("msm_dp_test_active", 0444,
-			debug->root,
-			debug, &test_active_fops);
+	if (!is_edp) {
+		debugfs_create_file("msm_dp_test_active", 0444,
+				    root,
+				    debug, &test_active_fops);
 
-	debugfs_create_file("msm_dp_test_data", 0444,
-			debug->root,
-			debug, &dp_test_data_fops);
+		debugfs_create_file("msm_dp_test_data", 0444,
+				    root,
+				    debug, &dp_test_data_fops);
 
-	debugfs_create_file("msm_dp_test_type", 0444,
-			debug->root,
-			debug, &dp_test_type_fops);
+		debugfs_create_file("msm_dp_test_type", 0444,
+				    root,
+				    debug, &dp_test_type_fops);
+	}
 }
 
 struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel,
 		struct dp_link *link,
-		struct drm_connector *connector, struct drm_minor *minor)
+		struct drm_connector *connector,
+		struct dentry *root, bool is_edp)
 {
 	struct dp_debug_private *debug;
 	struct dp_debug *dp_debug;
@@ -253,46 +247,15 @@ struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel,
 	debug->dp_debug.debug_en = false;
 	debug->link = link;
 	debug->panel = panel;
-	debug->dev = dev;
-	debug->drm_dev = minor->dev;
-	debug->connector = connector;
 
 	dp_debug = &debug->dp_debug;
 	dp_debug->vdisplay = 0;
 	dp_debug->hdisplay = 0;
 	dp_debug->vrefresh = 0;
 
-	dp_debug_init(dp_debug, minor);
+	dp_debug_init(dp_debug, root, is_edp);
 
 	return dp_debug;
  error:
 	return ERR_PTR(rc);
 }
-
-static int dp_debug_deinit(struct dp_debug *dp_debug)
-{
-	struct dp_debug_private *debug;
-
-	if (!dp_debug)
-		return -EINVAL;
-
-	debug = container_of(dp_debug, struct dp_debug_private, dp_debug);
-
-	debugfs_remove_recursive(debug->root);
-
-	return 0;
-}
-
-void dp_debug_put(struct dp_debug *dp_debug)
-{
-	struct dp_debug_private *debug;
-
-	if (!dp_debug)
-		return;
-
-	debug = container_of(dp_debug, struct dp_debug_private, dp_debug);
-
-	dp_debug_deinit(dp_debug);
-
-	devm_kfree(debug->dev, debug);
-}
diff --git a/drivers/gpu/drm/msm/dp/dp_debug.h b/drivers/gpu/drm/msm/dp/dp_debug.h
index 124227873d58..9b3b2e702f65 100644
--- a/drivers/gpu/drm/msm/dp/dp_debug.h
+++ b/drivers/gpu/drm/msm/dp/dp_debug.h
@@ -34,7 +34,8 @@ struct dp_debug {
  * @panel: instance of panel module
  * @link: instance of link module
  * @connector: double pointer to display connector
- * @minor: pointer to drm minor number after device registration
+ * @root: connector's debugfs root
+ * @is_edp: set for eDP connectors / panels
  * return: pointer to allocated debug module data
  *
  * This function sets up the debug module and provides a way
@@ -43,31 +44,21 @@ struct dp_debug {
 struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel,
 		struct dp_link *link,
 		struct drm_connector *connector,
-		struct drm_minor *minor);
-
-/**
- * dp_debug_put()
- *
- * Cleans up dp_debug instance
- *
- * @dp_debug: instance of dp_debug
- */
-void dp_debug_put(struct dp_debug *dp_debug);
+		struct dentry *root,
+		bool is_edp);
 
 #else
 
 static inline
 struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel,
 		struct dp_link *link,
-		struct drm_connector *connector, struct drm_minor *minor)
+		struct drm_connector *connector,
+		struct dentry *root,
+		bool is_edp)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-static inline void dp_debug_put(struct dp_debug *dp_debug)
-{
-}
-
 #endif /* defined(CONFIG_DEBUG_FS) */
 
 #endif /* _DP_DEBUG_H_ */
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 1b88fb52726f..d37d599aec27 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -49,13 +49,11 @@ enum {
 	ST_CONNECTED,
 	ST_DISCONNECT_PENDING,
 	ST_DISPLAY_OFF,
-	ST_SUSPENDED,
 };
 
 enum {
 	EV_NO_EVENT,
 	/* hpd events */
-	EV_HPD_INIT_SETUP,
 	EV_HPD_PLUG_INT,
 	EV_IRQ_HPD_INT,
 	EV_HPD_UNPLUG_INT,
@@ -170,6 +168,11 @@ static const struct msm_dp_desc sm8350_dp_descs[] = {
 	{}
 };
 
+static const struct msm_dp_desc sm8650_dp_descs[] = {
+	{ .io_start = 0x0af54000, .id = MSM_DP_CONTROLLER_0, .connector_type = DRM_MODE_CONNECTOR_DisplayPort },
+	{}
+};
+
 static const struct of_device_id dp_dt_match[] = {
 	{ .compatible = "qcom,sc7180-dp", .data = &sc7180_dp_descs },
 	{ .compatible = "qcom,sc7280-dp", .data = &sc7280_dp_descs },
@@ -180,6 +183,7 @@ static const struct of_device_id dp_dt_match[] = {
 	{ .compatible = "qcom,sc8280xp-edp", .data = &sc8280xp_edp_descs },
 	{ .compatible = "qcom,sdm845-dp", .data = &sc7180_dp_descs },
 	{ .compatible = "qcom,sm8350-dp", .data = &sm8350_dp_descs },
+	{ .compatible = "qcom,sm8650-dp", .data = &sm8650_dp_descs },
 	{}
 };
 
@@ -275,11 +279,6 @@ static int dp_display_bind(struct device *dev, struct device *master,
 	dp->dp_display.drm_dev = drm;
 	priv->dp[dp->id] = &dp->dp_display;
 
-	rc = dp->parser->parse(dp->parser);
-	if (rc) {
-		DRM_ERROR("device tree parsing failed\n");
-		goto end;
-	}
 
 
 	dp->drm_dev = drm;
@@ -290,11 +289,6 @@ static int dp_display_bind(struct device *dev, struct device *master,
 		goto end;
 	}
 
-	rc = dp_power_client_init(dp->power);
-	if (rc) {
-		DRM_ERROR("Power client create failed\n");
-		goto end;
-	}
 
 	rc = dp_register_audio_driver(dev, dp->audio);
 	if (rc) {
@@ -319,15 +313,10 @@ static void dp_display_unbind(struct device *dev, struct device *master,
 	struct dp_display_private *dp = dev_get_dp_display_private(dev);
 	struct msm_drm_private *priv = dev_get_drvdata(master);
 
-	/* disable all HPD interrupts */
-	if (dp->core_initialized)
-		dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false);
-
 	kthread_stop(dp->ev_tsk);
 
 	of_dp_aux_depopulate_bus(dp->aux);
 
-	dp_power_client_deinit(dp->power);
 	dp_unregister_audio_driver(dev, dp->audio);
 	dp_aux_unregister(dp->aux);
 	dp->drm_dev = NULL;
@@ -340,27 +329,10 @@ static const struct component_ops dp_display_comp_ops = {
 	.unbind = dp_display_unbind,
 };
 
-static void dp_display_send_hpd_event(struct msm_dp *dp_display)
-{
-	struct dp_display_private *dp;
-	struct drm_connector *connector;
-
-	dp = container_of(dp_display, struct dp_display_private, dp_display);
-
-	connector = dp->dp_display.connector;
-	drm_helper_hpd_irq_event(connector->dev);
-}
-
-
 static int dp_display_send_hpd_notification(struct dp_display_private *dp,
 					    bool hpd)
 {
-	if ((hpd && dp->dp_display.is_connected) ||
-			(!hpd && !dp->dp_display.is_connected)) {
-		drm_dbg_dp(dp->drm_dev, "HPD already %s\n",
-				(hpd ? "on" : "off"));
-		return 0;
-	}
+	struct drm_bridge *bridge = dp->dp_display.bridge;
 
 	/* reset video pattern flag on disconnect */
 	if (!hpd) {
@@ -372,11 +344,11 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp,
 							 dp->panel->downstream_ports);
 	}
 
-	dp->dp_display.is_connected = hpd;
+	dp->dp_display.link_ready = hpd;
 
 	drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n",
 			dp->dp_display.connector_type, hpd);
-	dp_display_send_hpd_event(&dp->dp_display);
+	drm_bridge_hpd_notify(bridge, dp->dp_display.link_ready);
 
 	return 0;
 }
@@ -575,6 +547,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
 {
 	u32 state;
 	int ret;
+	struct platform_device *pdev = dp->dp_display.pdev;
 
 	mutex_lock(&dp->event_mutex);
 
@@ -582,7 +555,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
 	drm_dbg_dp(dp->drm_dev, "Before, type=%d hpd_state=%d\n",
 			dp->dp_display.connector_type, state);
 
-	if (state == ST_DISPLAY_OFF || state == ST_SUSPENDED) {
+	if (state == ST_DISPLAY_OFF) {
 		mutex_unlock(&dp->event_mutex);
 		return 0;
 	}
@@ -599,7 +572,14 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
 		return 0;
 	}
 
-	ret = dp_display_usbpd_configure_cb(&dp->dp_display.pdev->dev);
+	ret = pm_runtime_resume_and_get(&pdev->dev);
+	if (ret) {
+		DRM_ERROR("failed to pm_runtime_resume\n");
+		mutex_unlock(&dp->event_mutex);
+		return ret;
+	}
+
+	ret = dp_display_usbpd_configure_cb(&pdev->dev);
 	if (ret) {	/* link train failed */
 		dp->hpd_state = ST_DISCONNECTED;
 	} else {
@@ -631,6 +611,7 @@ static void dp_display_handle_plugged_change(struct msm_dp *dp_display,
 static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
 {
 	u32 state;
+	struct platform_device *pdev = dp->dp_display.pdev;
 
 	mutex_lock(&dp->event_mutex);
 
@@ -681,6 +662,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
 			dp->dp_display.connector_type, state);
 
 	/* uevent will complete disconnection part */
+	pm_runtime_put_sync(&pdev->dev);
 	mutex_unlock(&dp->event_mutex);
 	return 0;
 }
@@ -696,7 +678,7 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data)
 	drm_dbg_dp(dp->drm_dev, "Before, type=%d hpd_state=%d\n",
 			dp->dp_display.connector_type, state);
 
-	if (state == ST_DISPLAY_OFF || state == ST_SUSPENDED) {
+	if (state == ST_DISPLAY_OFF) {
 		mutex_unlock(&dp->event_mutex);
 		return 0;
 	}
@@ -720,7 +702,6 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data)
 
 static void dp_display_deinit_sub_modules(struct dp_display_private *dp)
 {
-	dp_debug_put(dp->debug);
 	dp_audio_put(dp->audio);
 	dp_panel_put(dp->panel);
 	dp_aux_put(dp->aux);
@@ -918,7 +899,7 @@ int dp_display_set_plugged_cb(struct msm_dp *dp_display,
 
 	dp_display->plugged_cb = fn;
 	dp_display->codec_dev = codec_dev;
-	plugged = dp_display->is_connected;
+	plugged = dp_display->link_ready;
 	dp_display_handle_plugged_change(dp_display, plugged);
 
 	return 0;
@@ -1108,9 +1089,6 @@ static int hpd_event_thread(void *data)
 		spin_unlock_irqrestore(&dp_priv->event_lock, flag);
 
 		switch (todo->event_id) {
-		case EV_HPD_INIT_SETUP:
-			dp_display_host_init(dp_priv);
-			break;
 		case EV_HPD_PLUG_INT:
 			dp_hpd_plug_handle(dp_priv, todo->data);
 			break;
@@ -1189,27 +1167,21 @@ static irqreturn_t dp_display_irq_handler(int irq, void *dev_id)
 	return ret;
 }
 
-int dp_display_request_irq(struct msm_dp *dp_display)
+static int dp_display_request_irq(struct dp_display_private *dp)
 {
 	int rc = 0;
-	struct dp_display_private *dp;
-
-	if (!dp_display) {
-		DRM_ERROR("invalid input\n");
-		return -EINVAL;
-	}
+	struct platform_device *pdev = dp->dp_display.pdev;
 
-	dp = container_of(dp_display, struct dp_display_private, dp_display);
-
-	dp->irq = irq_of_parse_and_map(dp->dp_display.pdev->dev.of_node, 0);
-	if (!dp->irq) {
+	dp->irq = platform_get_irq(pdev, 0);
+	if (dp->irq < 0) {
 		DRM_ERROR("failed to get irq\n");
-		return -EINVAL;
+		return dp->irq;
 	}
 
-	rc = devm_request_irq(dp_display->drm_dev->dev, dp->irq,
-			dp_display_irq_handler,
-			IRQF_TRIGGER_HIGH, "dp_display_isr", dp);
+	rc = devm_request_irq(&pdev->dev, dp->irq, dp_display_irq_handler,
+			      IRQF_TRIGGER_HIGH|IRQF_NO_AUTOEN,
+			      "dp_display_isr", dp);
+
 	if (rc < 0) {
 		DRM_ERROR("failed to request IRQ%u: %d\n",
 				dp->irq, rc);
@@ -1238,6 +1210,29 @@ static const struct msm_dp_desc *dp_display_get_desc(struct platform_device *pde
 	return NULL;
 }
 
+static int dp_display_get_next_bridge(struct msm_dp *dp);
+
+static int dp_display_probe_tail(struct device *dev)
+{
+	struct msm_dp *dp = dev_get_drvdata(dev);
+	int ret;
+
+	ret = dp_display_get_next_bridge(dp);
+	if (ret)
+		return ret;
+
+	ret = component_add(dev, &dp_display_comp_ops);
+	if (ret)
+		DRM_ERROR("component add failed, rc=%d\n", ret);
+
+	return ret;
+}
+
+static int dp_auxbus_done_probe(struct drm_dp_aux *aux)
+{
+	return dp_display_probe_tail(aux->dev);
+}
+
 static int dp_display_probe(struct platform_device *pdev)
 {
 	int rc = 0;
@@ -1271,6 +1266,18 @@ static int dp_display_probe(struct platform_device *pdev)
 		return -EPROBE_DEFER;
 	}
 
+	rc = dp->parser->parse(dp->parser);
+	if (rc) {
+		DRM_ERROR("device tree parsing failed\n");
+		goto err;
+	}
+
+	rc = dp_power_client_init(dp->power);
+	if (rc) {
+		DRM_ERROR("Power client create failed\n");
+		goto err;
+	}
+
 	/* setup event q */
 	mutex_init(&dp->event_mutex);
 	init_waitqueue_head(&dp->event_q);
@@ -1283,13 +1290,31 @@ static int dp_display_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, &dp->dp_display);
 
-	rc = component_add(&pdev->dev, &dp_display_comp_ops);
-	if (rc) {
-		DRM_ERROR("component add failed, rc=%d\n", rc);
-		dp_display_deinit_sub_modules(dp);
+	rc = devm_pm_runtime_enable(&pdev->dev);
+	if (rc)
+		goto err;
+
+	rc = dp_display_request_irq(dp);
+	if (rc)
+		goto err;
+
+	if (dp->dp_display.is_edp) {
+		rc = devm_of_dp_aux_populate_bus(dp->aux, dp_auxbus_done_probe);
+		if (rc) {
+			DRM_ERROR("eDP auxbus population failed, rc=%d\n", rc);
+			goto err;
+		}
+	} else {
+		rc = dp_display_probe_tail(&pdev->dev);
+		if (rc)
+			goto err;
 	}
 
 	return rc;
+
+err:
+	dp_display_deinit_sub_modules(dp);
+	return rc;
 }
 
 static void dp_display_remove(struct platform_device *pdev)
@@ -1298,113 +1323,50 @@ static void dp_display_remove(struct platform_device *pdev)
 
 	component_del(&pdev->dev, &dp_display_comp_ops);
 	dp_display_deinit_sub_modules(dp);
-
 	platform_set_drvdata(pdev, NULL);
 }
 
-static int dp_pm_resume(struct device *dev)
+static int dp_pm_runtime_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct msm_dp *dp_display = platform_get_drvdata(pdev);
-	struct dp_display_private *dp;
-	int sink_count = 0;
-
-	dp = container_of(dp_display, struct dp_display_private, dp_display);
-
-	mutex_lock(&dp->event_mutex);
-
-	drm_dbg_dp(dp->drm_dev,
-		"Before, type=%d core_inited=%d phy_inited=%d power_on=%d\n",
-		dp->dp_display.connector_type, dp->core_initialized,
-		dp->phy_initialized, dp_display->power_on);
-
-	/* start from disconnected state */
-	dp->hpd_state = ST_DISCONNECTED;
-
-	/* turn on dp ctrl/phy */
-	dp_display_host_init(dp);
-
-	if (dp_display->is_edp)
-		dp_catalog_ctrl_hpd_enable(dp->catalog);
+	struct dp_display_private *dp = dev_get_dp_display_private(dev);
 
-	if (dp_catalog_link_is_connected(dp->catalog)) {
-		/*
-		 * set sink to normal operation mode -- D0
-		 * before dpcd read
-		 */
-		dp_display_host_phy_init(dp);
-		dp_link_psm_config(dp->link, &dp->panel->link_info, false);
-		sink_count = drm_dp_read_sink_count(dp->aux);
-		if (sink_count < 0)
-			sink_count = 0;
+	disable_irq(dp->irq);
 
+	if (dp->dp_display.is_edp) {
 		dp_display_host_phy_exit(dp);
+		dp_catalog_ctrl_hpd_disable(dp->catalog);
 	}
-
-	dp->link->sink_count = sink_count;
-	/*
-	 * can not declared display is connected unless
-	 * HDMI cable is plugged in and sink_count of
-	 * dongle become 1
-	 * also only signal audio when disconnected
-	 */
-	if (dp->link->sink_count) {
-		dp->dp_display.is_connected = true;
-	} else {
-		dp->dp_display.is_connected = false;
-		dp_display_handle_plugged_change(dp_display, false);
-	}
-
-	drm_dbg_dp(dp->drm_dev,
-		"After, type=%d sink=%d conn=%d core_init=%d phy_init=%d power=%d\n",
-		dp->dp_display.connector_type, dp->link->sink_count,
-		dp->dp_display.is_connected, dp->core_initialized,
-		dp->phy_initialized, dp_display->power_on);
-
-	mutex_unlock(&dp->event_mutex);
+	dp_display_host_deinit(dp);
 
 	return 0;
 }
 
-static int dp_pm_suspend(struct device *dev)
+static int dp_pm_runtime_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct msm_dp *dp_display = platform_get_drvdata(pdev);
-	struct dp_display_private *dp;
-
-	dp = container_of(dp_display, struct dp_display_private, dp_display);
-
-	mutex_lock(&dp->event_mutex);
-
-	drm_dbg_dp(dp->drm_dev,
-		"Before, type=%d core_inited=%d  phy_inited=%d power_on=%d\n",
-		dp->dp_display.connector_type, dp->core_initialized,
-		dp->phy_initialized, dp_display->power_on);
-
-	/* mainlink enabled */
-	if (dp_power_clk_status(dp->power, DP_CTRL_PM))
-		dp_ctrl_off_link_stream(dp->ctrl);
-
-	dp_display_host_phy_exit(dp);
-
-	/* host_init will be called at pm_resume */
-	dp_display_host_deinit(dp);
-
-	dp->hpd_state = ST_SUSPENDED;
-
-	drm_dbg_dp(dp->drm_dev,
-		"After, type=%d core_inited=%d phy_inited=%d power_on=%d\n",
-		dp->dp_display.connector_type, dp->core_initialized,
-		dp->phy_initialized, dp_display->power_on);
+	struct dp_display_private *dp = dev_get_dp_display_private(dev);
 
-	mutex_unlock(&dp->event_mutex);
+	/*
+	 * for eDP, host cotroller, HPD block and PHY are enabled here
+	 * but with HPD irq disabled
+	 *
+	 * for DP, only host controller is enabled here.
+	 * HPD block is enabled at dp_bridge_hpd_enable()
+	 * PHY will be enabled at plugin handler later
+	 */
+	dp_display_host_init(dp);
+	if (dp->dp_display.is_edp) {
+		dp_catalog_ctrl_hpd_enable(dp->catalog);
+		dp_display_host_phy_init(dp);
+	}
 
+	enable_irq(dp->irq);
 	return 0;
 }
 
 static const struct dev_pm_ops dp_pm_ops = {
-	.suspend = dp_pm_suspend,
-	.resume =  dp_pm_resume,
+	SET_RUNTIME_PM_OPS(dp_pm_runtime_suspend, dp_pm_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
 };
 
 static struct platform_driver dp_display_driver = {
@@ -1434,19 +1396,6 @@ void __exit msm_dp_unregister(void)
 	platform_driver_unregister(&dp_display_driver);
 }
 
-void msm_dp_irq_postinstall(struct msm_dp *dp_display)
-{
-	struct dp_display_private *dp;
-
-	if (!dp_display)
-		return;
-
-	dp = container_of(dp_display, struct dp_display_private, dp_display);
-
-	if (!dp_display->is_edp)
-		dp_add_event(dp, EV_HPD_INIT_SETUP, 0, 0);
-}
-
 bool msm_dp_wide_bus_available(const struct msm_dp *dp_display)
 {
 	struct dp_display_private *dp;
@@ -1456,7 +1405,7 @@ bool msm_dp_wide_bus_available(const struct msm_dp *dp_display)
 	return dp->wide_bus_en;
 }
 
-void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor)
+void dp_display_debugfs_init(struct msm_dp *dp_display, struct dentry *root, bool is_edp)
 {
 	struct dp_display_private *dp;
 	struct device *dev;
@@ -1467,7 +1416,7 @@ void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor)
 
 	dp->debug = dp_debug_get(dev, dp->panel,
 					dp->link, dp->dp_display.connector,
-					minor);
+					root, is_edp);
 	if (IS_ERR(dp->debug)) {
 		rc = PTR_ERR(dp->debug);
 		DRM_ERROR("failed to initialize debug, rc = %d\n", rc);
@@ -1479,33 +1428,8 @@ static int dp_display_get_next_bridge(struct msm_dp *dp)
 {
 	int rc;
 	struct dp_display_private *dp_priv;
-	struct device_node *aux_bus;
-	struct device *dev;
 
 	dp_priv = container_of(dp, struct dp_display_private, dp_display);
-	dev = &dp_priv->dp_display.pdev->dev;
-	aux_bus = of_get_child_by_name(dev->of_node, "aux-bus");
-
-	if (aux_bus && dp->is_edp) {
-		dp_display_host_init(dp_priv);
-		dp_catalog_ctrl_hpd_enable(dp_priv->catalog);
-		dp_display_host_phy_init(dp_priv);
-
-		/*
-		 * The code below assumes that the panel will finish probing
-		 * by the time devm_of_dp_aux_populate_ep_devices() returns.
-		 * This isn't a great assumption since it will fail if the
-		 * panel driver is probed asynchronously but is the best we
-		 * can do without a bigger driver reorganization.
-		 */
-		rc = of_dp_aux_populate_bus(dp_priv->aux, NULL);
-		of_node_put(aux_bus);
-		if (rc)
-			goto error;
-	} else if (dp->is_edp) {
-		DRM_ERROR("eDP aux_bus not found\n");
-		return -ENODEV;
-	}
 
 	/*
 	 * External bridges are mandatory for eDP interfaces: one has to
@@ -1514,21 +1438,13 @@ static int dp_display_get_next_bridge(struct msm_dp *dp)
 	 * For DisplayPort interfaces external bridges are optional, so
 	 * silently ignore an error if one is not present (-ENODEV).
 	 */
-	rc = devm_dp_parser_find_next_bridge(dp->drm_dev->dev, dp_priv->parser);
+	rc = devm_dp_parser_find_next_bridge(&dp->pdev->dev, dp_priv->parser);
 	if (!dp->is_edp && rc == -ENODEV)
 		return 0;
 
-	if (!rc) {
+	if (!rc)
 		dp->next_bridge = dp_priv->parser->next_bridge;
-		return 0;
-	}
 
-error:
-	if (dp->is_edp) {
-		of_dp_aux_depopulate_bus(dp_priv->aux);
-		dp_display_host_phy_exit(dp_priv);
-		dp_display_host_deinit(dp_priv);
-	}
 	return rc;
 }
 
@@ -1542,16 +1458,6 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev,
 
 	dp_priv = container_of(dp_display, struct dp_display_private, dp_display);
 
-	ret = dp_display_request_irq(dp_display);
-	if (ret) {
-		DRM_ERROR("request_irq failed, ret=%d\n", ret);
-		return ret;
-	}
-
-	ret = dp_display_get_next_bridge(dp_display);
-	if (ret)
-		return ret;
-
 	ret = dp_bridge_init(dp_display, dev, encoder);
 	if (ret) {
 		DRM_DEV_ERROR(dev->dev,
@@ -1593,6 +1499,11 @@ void dp_bridge_atomic_enable(struct drm_bridge *drm_bridge,
 		dp_hpd_plug_handle(dp_display, 0);
 
 	mutex_lock(&dp_display->event_mutex);
+	if (pm_runtime_resume_and_get(&dp->pdev->dev)) {
+		DRM_ERROR("failed to pm_runtime_resume\n");
+		mutex_unlock(&dp_display->event_mutex);
+		return;
+	}
 
 	state = dp_display->hpd_state;
 	if (state != ST_DISPLAY_OFF && state != ST_MAINLINK_READY) {
@@ -1657,10 +1568,9 @@ void dp_bridge_atomic_post_disable(struct drm_bridge *drm_bridge,
 	mutex_lock(&dp_display->event_mutex);
 
 	state = dp_display->hpd_state;
-	if (state != ST_DISCONNECT_PENDING && state != ST_CONNECTED) {
-		mutex_unlock(&dp_display->event_mutex);
-		return;
-	}
+	if (state != ST_DISCONNECT_PENDING && state != ST_CONNECTED)
+		drm_dbg_dp(dp->drm_dev, "type=%d wrong hpd_state=%d\n",
+			   dp->connector_type, state);
 
 	dp_display_disable(dp_display);
 
@@ -1673,6 +1583,8 @@ void dp_bridge_atomic_post_disable(struct drm_bridge *drm_bridge,
 	}
 
 	drm_dbg_dp(dp->drm_dev, "type=%d Done\n", dp->connector_type);
+
+	pm_runtime_put_sync(&dp->pdev->dev);
 	mutex_unlock(&dp_display->event_mutex);
 }
 
@@ -1711,7 +1623,21 @@ void dp_bridge_hpd_enable(struct drm_bridge *bridge)
 	struct msm_dp *dp_display = dp_bridge->dp_display;
 	struct dp_display_private *dp = container_of(dp_display, struct dp_display_private, dp_display);
 
+	/*
+	 * this is for external DP with hpd irq enabled case,
+	 * step-1: dp_pm_runtime_resume() enable dp host only
+	 * step-2: enable hdp block and have hpd irq enabled here
+	 * step-3: waiting for plugin irq while phy is not initialized
+	 * step-4: DP PHY is initialized at plugin handler before link training
+	 *
+	 */
 	mutex_lock(&dp->event_mutex);
+	if (pm_runtime_resume_and_get(&dp_display->pdev->dev)) {
+		DRM_ERROR("failed to resume power\n");
+		mutex_unlock(&dp->event_mutex);
+		return;
+	}
+
 	dp_catalog_ctrl_hpd_enable(dp->catalog);
 
 	/* enable HDP interrupts */
@@ -1733,6 +1659,8 @@ void dp_bridge_hpd_disable(struct drm_bridge *bridge)
 	dp_catalog_ctrl_hpd_disable(dp->catalog);
 
 	dp_display->internal_hpd = false;
+
+	pm_runtime_put_sync(&dp_display->pdev->dev);
 	mutex_unlock(&dp->event_mutex);
 }
 
@@ -1747,13 +1675,8 @@ void dp_bridge_hpd_notify(struct drm_bridge *bridge,
 	if (dp_display->internal_hpd)
 		return;
 
-	if (!dp->core_initialized) {
-		drm_dbg_dp(dp->drm_dev, "not initialized\n");
-		return;
-	}
-
-	if (!dp_display->is_connected && status == connector_status_connected)
+	if (!dp_display->link_ready && status == connector_status_connected)
 		dp_add_event(dp, EV_HPD_PLUG_INT, 0, 0);
-	else if (dp_display->is_connected && status == connector_status_disconnected)
+	else if (dp_display->link_ready && status == connector_status_disconnected)
 		dp_add_event(dp, EV_HPD_UNPLUG_INT, 0, 0);
 }
diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h
index f66cdbc35785..102f3507d824 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.h
+++ b/drivers/gpu/drm/msm/dp/dp_display.h
@@ -17,7 +17,7 @@ struct msm_dp {
 	struct drm_bridge *bridge;
 	struct drm_connector *connector;
 	struct drm_bridge *next_bridge;
-	bool is_connected;
+	bool link_ready;
 	bool audio_enabled;
 	bool power_on;
 	unsigned int connector_type;
@@ -36,11 +36,11 @@ struct msm_dp {
 int dp_display_set_plugged_cb(struct msm_dp *dp_display,
 		hdmi_codec_plugged_cb fn, struct device *codec_dev);
 int dp_display_get_modes(struct msm_dp *dp_display);
-int dp_display_request_irq(struct msm_dp *dp_display);
 bool dp_display_check_video_test(struct msm_dp *dp_display);
 int dp_display_get_test_bpp(struct msm_dp *dp_display);
 void dp_display_signal_audio_start(struct msm_dp *dp_display);
 void dp_display_signal_audio_complete(struct msm_dp *dp_display);
 void dp_display_set_psr(struct msm_dp *dp, bool enter);
+void dp_display_debugfs_init(struct msm_dp *dp_display, struct dentry *dentry, bool is_edp);
 
 #endif /* _DP_DISPLAY_H_ */
diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c
index e3bdd7dd4cdc..46e6889037e8 100644
--- a/drivers/gpu/drm/msm/dp/dp_drm.c
+++ b/drivers/gpu/drm/msm/dp/dp_drm.c
@@ -24,10 +24,10 @@ static enum drm_connector_status dp_bridge_detect(struct drm_bridge *bridge)
 
 	dp = to_dp_bridge(bridge)->dp_display;
 
-	drm_dbg_dp(dp->drm_dev, "is_connected = %s\n",
-		(dp->is_connected) ? "true" : "false");
+	drm_dbg_dp(dp->drm_dev, "link_ready = %s\n",
+		(dp->link_ready) ? "true" : "false");
 
-	return (dp->is_connected) ? connector_status_connected :
+	return (dp->link_ready) ? connector_status_connected :
 					connector_status_disconnected;
 }
 
@@ -40,8 +40,8 @@ static int dp_bridge_atomic_check(struct drm_bridge *bridge,
 
 	dp = to_dp_bridge(bridge)->dp_display;
 
-	drm_dbg_dp(dp->drm_dev, "is_connected = %s\n",
-		(dp->is_connected) ? "true" : "false");
+	drm_dbg_dp(dp->drm_dev, "link_ready = %s\n",
+		(dp->link_ready) ? "true" : "false");
 
 	/*
 	 * There is no protection in the DRM framework to check if the display
@@ -55,7 +55,7 @@ static int dp_bridge_atomic_check(struct drm_bridge *bridge,
 	 * After that this piece of code can be removed.
 	 */
 	if (bridge->ops & DRM_BRIDGE_OP_HPD)
-		return (dp->is_connected) ? 0 : -ENOTCONN;
+		return (dp->link_ready) ? 0 : -ENOTCONN;
 
 	return 0;
 }
@@ -78,7 +78,7 @@ static int dp_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector *
 	dp = to_dp_bridge(bridge)->dp_display;
 
 	/* pluggable case assumes EDID is read when HPD */
-	if (dp->is_connected) {
+	if (dp->link_ready) {
 		rc = dp_display_get_modes(dp);
 		if (rc <= 0) {
 			DRM_ERROR("failed to get DP sink modes, rc=%d\n", rc);
@@ -90,6 +90,13 @@ static int dp_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector *
 	return rc;
 }
 
+static void dp_bridge_debugfs_init(struct drm_bridge *bridge, struct dentry *root)
+{
+	struct msm_dp *dp = to_dp_bridge(bridge)->dp_display;
+
+	dp_display_debugfs_init(dp, root, false);
+}
+
 static const struct drm_bridge_funcs dp_bridge_ops = {
 	.atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
 	.atomic_destroy_state   = drm_atomic_helper_bridge_destroy_state,
@@ -105,6 +112,7 @@ static const struct drm_bridge_funcs dp_bridge_ops = {
 	.hpd_enable   = dp_bridge_hpd_enable,
 	.hpd_disable  = dp_bridge_hpd_disable,
 	.hpd_notify   = dp_bridge_hpd_notify,
+	.debugfs_init = dp_bridge_debugfs_init,
 };
 
 static int edp_bridge_atomic_check(struct drm_bridge *drm_bridge,
@@ -260,6 +268,13 @@ static enum drm_mode_status edp_bridge_mode_valid(struct drm_bridge *bridge,
 	return MODE_OK;
 }
 
+static void edp_bridge_debugfs_init(struct drm_bridge *bridge, struct dentry *root)
+{
+	struct msm_dp *dp = to_dp_bridge(bridge)->dp_display;
+
+	dp_display_debugfs_init(dp, root, true);
+}
+
 static const struct drm_bridge_funcs edp_bridge_ops = {
 	.atomic_enable = edp_bridge_atomic_enable,
 	.atomic_disable = edp_bridge_atomic_disable,
@@ -270,6 +285,7 @@ static const struct drm_bridge_funcs edp_bridge_ops = {
 	.atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_bridge_destroy_state,
 	.atomic_check = edp_bridge_atomic_check,
+	.debugfs_init = edp_bridge_debugfs_init,
 };
 
 int dp_bridge_init(struct msm_dp *dp_display, struct drm_device *dev,
diff --git a/drivers/gpu/drm/msm/dp/dp_power.c b/drivers/gpu/drm/msm/dp/dp_power.c
index 5cb84ca40e9e..c4843dd69f47 100644
--- a/drivers/gpu/drm/msm/dp/dp_power.c
+++ b/drivers/gpu/drm/msm/dp/dp_power.c
@@ -152,45 +152,17 @@ int dp_power_client_init(struct dp_power *dp_power)
 
 	power = container_of(dp_power, struct dp_power_private, dp_power);
 
-	pm_runtime_enable(power->dev);
-
 	return dp_power_clk_init(power);
 }
 
-void dp_power_client_deinit(struct dp_power *dp_power)
-{
-	struct dp_power_private *power;
-
-	power = container_of(dp_power, struct dp_power_private, dp_power);
-
-	pm_runtime_disable(power->dev);
-}
-
 int dp_power_init(struct dp_power *dp_power)
 {
-	int rc = 0;
-	struct dp_power_private *power = NULL;
-
-	power = container_of(dp_power, struct dp_power_private, dp_power);
-
-	pm_runtime_get_sync(power->dev);
-
-	rc = dp_power_clk_enable(dp_power, DP_CORE_PM, true);
-	if (rc)
-		pm_runtime_put_sync(power->dev);
-
-	return rc;
+	return dp_power_clk_enable(dp_power, DP_CORE_PM, true);
 }
 
 int dp_power_deinit(struct dp_power *dp_power)
 {
-	struct dp_power_private *power;
-
-	power = container_of(dp_power, struct dp_power_private, dp_power);
-
-	dp_power_clk_enable(dp_power, DP_CORE_PM, false);
-	pm_runtime_put_sync(power->dev);
-	return 0;
+	return dp_power_clk_enable(dp_power, DP_CORE_PM, false);
 }
 
 struct dp_power *dp_power_get(struct device *dev, struct dp_parser *parser)
diff --git a/drivers/gpu/drm/msm/dp/dp_power.h b/drivers/gpu/drm/msm/dp/dp_power.h
index a3dec200785e..55ada51edb57 100644
--- a/drivers/gpu/drm/msm/dp/dp_power.h
+++ b/drivers/gpu/drm/msm/dp/dp_power.h
@@ -81,17 +81,6 @@ int dp_power_clk_enable(struct dp_power *power, enum dp_pm_type pm_type,
 int dp_power_client_init(struct dp_power *power);
 
 /**
- * dp_power_clinet_deinit() - de-initialize clock and regulator modules
- *
- * @power: instance of power module
- * return: 0 for success, error for failure.
- *
- * This API will de-initialize the DisplayPort's clocks and regulator
- * modules.
- */
-void dp_power_client_deinit(struct dp_power *power);
-
-/**
  * dp_power_get() - configure and get the DisplayPort power module data
  *
  * @parser: instance of parser module
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
index 1f98ff74ceb0..10ba7d153d1c 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
@@ -190,6 +190,21 @@ static const struct msm_dsi_config sm8550_dsi_cfg = {
 	},
 };
 
+static const struct regulator_bulk_data sm8650_dsi_regulators[] = {
+	{ .supply = "vdda", .init_load_uA = 16600 },	/* 1.2 V */
+};
+
+static const struct msm_dsi_config sm8650_dsi_cfg = {
+	.io_offset = DSI_6G_REG_SHIFT,
+	.regulator_data = sm8650_dsi_regulators,
+	.num_regulators = ARRAY_SIZE(sm8650_dsi_regulators),
+	.bus_clk_names = dsi_v2_4_clk_names,
+	.num_bus_clks = ARRAY_SIZE(dsi_v2_4_clk_names),
+	.io_start = {
+		{ 0xae94000, 0xae96000 },
+	},
+};
+
 static const struct regulator_bulk_data sc7280_dsi_regulators[] = {
 	{ .supply = "vdda", .init_load_uA = 8350 },	/* 1.2 V */
 	{ .supply = "refgen" },
@@ -281,6 +296,8 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = {
 		&sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops},
 	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_7_0,
 		&sm8550_dsi_cfg, &msm_dsi_6g_v2_host_ops},
+	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_8_0,
+		&sm8650_dsi_cfg, &msm_dsi_6g_v2_host_ops},
 };
 
 const struct msm_dsi_cfg_handler *msm_dsi_cfg_get(u32 major, u32 minor)
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
index 43f0dd74edb6..4c9b4b37681b 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
@@ -28,6 +28,7 @@
 #define MSM_DSI_6G_VER_MINOR_V2_5_0	0x20050000
 #define MSM_DSI_6G_VER_MINOR_V2_6_0	0x20060000
 #define MSM_DSI_6G_VER_MINOR_V2_7_0	0x20070000
+#define MSM_DSI_6G_VER_MINOR_V2_8_0	0x20080000
 
 #define MSM_DSI_V2_VER_MINOR_8064	0x0
 
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index 05621e5e7d63..24a347fe2998 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -516,7 +516,9 @@ static int dsi_phy_enable_resource(struct msm_dsi_phy *phy)
 	struct device *dev = &phy->pdev->dev;
 	int ret;
 
-	pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
+	if (ret)
+		return ret;
 
 	ret = clk_prepare_enable(phy->ahb_clk);
 	if (ret) {
@@ -585,6 +587,8 @@ static const struct of_device_id dsi_phy_dt_match[] = {
 	  .data = &dsi_phy_5nm_8450_cfgs },
 	{ .compatible = "qcom,sm8550-dsi-phy-4nm",
 	  .data = &dsi_phy_4nm_8550_cfgs },
+	{ .compatible = "qcom,sm8650-dsi-phy-4nm",
+	  .data = &dsi_phy_4nm_8650_cfgs },
 #endif
 	{}
 };
@@ -689,6 +693,10 @@ static int dsi_phy_driver_probe(struct platform_device *pdev)
 		return dev_err_probe(dev, PTR_ERR(phy->ahb_clk),
 				     "Unable to get ahb clk\n");
 
+	ret = devm_pm_runtime_enable(&pdev->dev);
+	if (ret)
+		return ret;
+
 	/* PLL init will call into clk_register which requires
 	 * register access, so we need to enable power and ahb clock.
 	 */
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
index 8b640d174785..e4275d3ad581 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
@@ -62,6 +62,7 @@ extern const struct msm_dsi_phy_cfg dsi_phy_7nm_7280_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_5nm_8350_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_5nm_8450_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_4nm_8550_cfgs;
+extern const struct msm_dsi_phy_cfg dsi_phy_4nm_8650_cfgs;
 
 struct msm_dsi_dphy_timing {
 	u32 clk_zero;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
index 89a6344bc865..82d015aa2d63 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
@@ -1121,6 +1121,10 @@ static const struct regulator_bulk_data dsi_phy_7nm_37750uA_regulators[] = {
 	{ .supply = "vdds", .init_load_uA = 37550 },
 };
 
+static const struct regulator_bulk_data dsi_phy_7nm_98000uA_regulators[] = {
+	{ .supply = "vdds", .init_load_uA = 98000 },
+};
+
 static const struct regulator_bulk_data dsi_phy_7nm_97800uA_regulators[] = {
 	{ .supply = "vdds", .init_load_uA = 97800 },
 };
@@ -1281,3 +1285,26 @@ const struct msm_dsi_phy_cfg dsi_phy_4nm_8550_cfgs = {
 	.num_dsi_phy = 2,
 	.quirks = DSI_PHY_7NM_QUIRK_V5_2,
 };
+
+const struct msm_dsi_phy_cfg dsi_phy_4nm_8650_cfgs = {
+	.has_phy_lane = true,
+	.regulator_data = dsi_phy_7nm_98000uA_regulators,
+	.num_regulators = ARRAY_SIZE(dsi_phy_7nm_98000uA_regulators),
+	.ops = {
+		.enable = dsi_7nm_phy_enable,
+		.disable = dsi_7nm_phy_disable,
+		.pll_init = dsi_pll_7nm_init,
+		.save_pll_state = dsi_7nm_pll_save_state,
+		.restore_pll_state = dsi_7nm_pll_restore_state,
+		.set_continuous_clock = dsi_7nm_set_continuous_clock,
+	},
+	.min_pll_rate = 600000000UL,
+#ifdef CONFIG_64BIT
+	.max_pll_rate = 5000000000UL,
+#else
+	.max_pll_rate = ULONG_MAX,
+#endif
+	.io_start = { 0xae95000, 0xae97000 },
+	.num_dsi_phy = 2,
+	.quirks = DSI_PHY_7NM_QUIRK_V5_2,
+};
diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c
index 04d304eed223..4494f6d1c7cb 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -304,36 +304,21 @@ int msm_debugfs_late_init(struct drm_device *dev)
 	return ret;
 }
 
-void msm_debugfs_init(struct drm_minor *minor)
+static void msm_debugfs_gpu_init(struct drm_minor *minor)
 {
 	struct drm_device *dev = minor->dev;
 	struct msm_drm_private *priv = dev->dev_private;
 	struct dentry *gpu_devfreq;
 
-	drm_debugfs_create_files(msm_debugfs_list,
-				 ARRAY_SIZE(msm_debugfs_list),
-				 minor->debugfs_root, minor);
-
 	debugfs_create_file("gpu", S_IRUSR, minor->debugfs_root,
 		dev, &msm_gpu_fops);
 
-	if (priv->kms) {
-		drm_debugfs_create_files(msm_kms_debugfs_list,
-					 ARRAY_SIZE(msm_kms_debugfs_list),
-					 minor->debugfs_root, minor);
-		debugfs_create_file("kms", S_IRUSR, minor->debugfs_root,
-				    dev, &msm_kms_fops);
-	}
-
 	debugfs_create_u32("hangcheck_period_ms", 0600, minor->debugfs_root,
 		&priv->hangcheck_period);
 
 	debugfs_create_bool("disable_err_irq", 0600, minor->debugfs_root,
 		&priv->disable_err_irq);
 
-	debugfs_create_file("shrink", S_IRWXU, minor->debugfs_root,
-		dev, &shrink_fops);
-
 	gpu_devfreq = debugfs_create_dir("devfreq", minor->debugfs_root);
 
 	debugfs_create_bool("idle_clamp",0600, gpu_devfreq,
@@ -344,6 +329,30 @@ void msm_debugfs_init(struct drm_minor *minor)
 
 	debugfs_create_u32("downdifferential",0600, gpu_devfreq,
 			   &priv->gpu_devfreq_config.downdifferential);
+}
+
+void msm_debugfs_init(struct drm_minor *minor)
+{
+	struct drm_device *dev = minor->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+
+	drm_debugfs_create_files(msm_debugfs_list,
+				 ARRAY_SIZE(msm_debugfs_list),
+				 minor->debugfs_root, minor);
+
+	if (priv->gpu_pdev)
+		msm_debugfs_gpu_init(minor);
+
+	if (priv->kms) {
+		drm_debugfs_create_files(msm_kms_debugfs_list,
+					 ARRAY_SIZE(msm_kms_debugfs_list),
+					 minor->debugfs_root, minor);
+		debugfs_create_file("kms", S_IRUSR, minor->debugfs_root,
+				    dev, &msm_kms_fops);
+	}
+
+	debugfs_create_file("shrink", S_IRWXU, minor->debugfs_root,
+		dev, &shrink_fops);
 
 	if (priv->kms && priv->kms->funcs->debugfs_init)
 		priv->kms->funcs->debugfs_init(priv->kms, minor);
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 3f217b578293..50b65ffc24b1 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -37,9 +37,10 @@
  * - 1.9.0 - Add MSM_SUBMIT_FENCE_SN_IN
  * - 1.10.0 - Add MSM_SUBMIT_BO_NO_IMPLICIT
  * - 1.11.0 - Add wait boost (MSM_WAIT_FENCE_BOOST, MSM_PREP_BOOST)
+ * - 1.12.0 - Add MSM_INFO_SET_METADATA and MSM_INFO_GET_METADATA
  */
 #define MSM_VERSION_MAJOR	1
-#define MSM_VERSION_MINOR	10
+#define MSM_VERSION_MINOR	12
 #define MSM_VERSION_PATCHLEVEL	0
 
 static void msm_deinit_vram(struct drm_device *ddev);
@@ -544,6 +545,85 @@ static int msm_ioctl_gem_info_set_iova(struct drm_device *dev,
 	return msm_gem_set_iova(obj, ctx->aspace, iova);
 }
 
+static int msm_ioctl_gem_info_set_metadata(struct drm_gem_object *obj,
+					   __user void *metadata,
+					   u32 metadata_size)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	void *buf;
+	int ret;
+
+	/* Impose a moderate upper bound on metadata size: */
+	if (metadata_size > 128) {
+		return -EOVERFLOW;
+	}
+
+	/* Use a temporary buf to keep copy_from_user() outside of gem obj lock: */
+	buf = memdup_user(metadata, metadata_size);
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	ret = msm_gem_lock_interruptible(obj);
+	if (ret)
+		goto out;
+
+	msm_obj->metadata =
+		krealloc(msm_obj->metadata, metadata_size, GFP_KERNEL);
+	msm_obj->metadata_size = metadata_size;
+	memcpy(msm_obj->metadata, buf, metadata_size);
+
+	msm_gem_unlock(obj);
+
+out:
+	kfree(buf);
+
+	return ret;
+}
+
+static int msm_ioctl_gem_info_get_metadata(struct drm_gem_object *obj,
+					   __user void *metadata,
+					   u32 *metadata_size)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	void *buf;
+	int ret, len;
+
+	if (!metadata) {
+		/*
+		 * Querying the size is inherently racey, but
+		 * EXT_external_objects expects the app to confirm
+		 * via device and driver UUIDs that the exporter and
+		 * importer versions match.  All we can do from the
+		 * kernel side is check the length under obj lock
+		 * when userspace tries to retrieve the metadata
+		 */
+		*metadata_size = msm_obj->metadata_size;
+		return 0;
+	}
+
+	ret = msm_gem_lock_interruptible(obj);
+	if (ret)
+		return ret;
+
+	/* Avoid copy_to_user() under gem obj lock: */
+	len = msm_obj->metadata_size;
+	buf = kmemdup(msm_obj->metadata, len, GFP_KERNEL);
+
+	msm_gem_unlock(obj);
+
+	if (*metadata_size < len) {
+		ret = -ETOOSMALL;
+	} else if (copy_to_user(metadata, buf, len)) {
+		ret = -EFAULT;
+	} else {
+		*metadata_size = len;
+	}
+
+	kfree(buf);
+
+	return 0;
+}
+
 static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 		struct drm_file *file)
 {
@@ -566,6 +646,8 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 		break;
 	case MSM_INFO_SET_NAME:
 	case MSM_INFO_GET_NAME:
+	case MSM_INFO_SET_METADATA:
+	case MSM_INFO_GET_METADATA:
 		break;
 	default:
 		return -EINVAL;
@@ -618,7 +700,7 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 		break;
 	case MSM_INFO_GET_NAME:
 		if (args->value && (args->len < strlen(msm_obj->name))) {
-			ret = -EINVAL;
+			ret = -ETOOSMALL;
 			break;
 		}
 		args->len = strlen(msm_obj->name);
@@ -628,6 +710,14 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 				ret = -EFAULT;
 		}
 		break;
+	case MSM_INFO_SET_METADATA:
+		ret = msm_ioctl_gem_info_set_metadata(
+			obj, u64_to_user_ptr(args->value), args->len);
+		break;
+	case MSM_INFO_GET_METADATA:
+		ret = msm_ioctl_gem_info_get_metadata(
+			obj, u64_to_user_ptr(args->value), &args->len);
+		break;
 	}
 
 	drm_gem_object_put(obj);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index cd5bf658df66..16a7cbc0b7dd 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -78,12 +78,10 @@ enum msm_dsi_controller {
  * enum msm_event_wait - type of HW events to wait for
  * @MSM_ENC_COMMIT_DONE - wait for the driver to flush the registers to HW
  * @MSM_ENC_TX_COMPLETE - wait for the HW to transfer the frame to panel
- * @MSM_ENC_VBLANK - wait for the HW VBLANK event (for driver-internal waiters)
  */
 enum msm_event_wait {
 	MSM_ENC_COMMIT_DONE = 0,
 	MSM_ENC_TX_COMPLETE,
-	MSM_ENC_VBLANK,
 };
 
 /**
@@ -92,12 +90,14 @@ enum msm_event_wait {
  * @num_intf:     number of interfaces the panel is mounted on
  * @num_dspp:     number of dspp blocks used
  * @num_dsc:      number of Display Stream Compression (DSC) blocks used
+ * @needs_cdm:    indicates whether cdm block is needed for this display topology
  */
 struct msm_display_topology {
 	u32 num_lm;
 	u32 num_intf;
 	u32 num_dspp;
 	u32 num_dsc;
+	bool needs_cdm;
 };
 
 /* Commit/Event thread specific structure */
@@ -386,10 +386,8 @@ int __init msm_dp_register(void);
 void __exit msm_dp_unregister(void);
 int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev,
 			 struct drm_encoder *encoder);
-void msm_dp_irq_postinstall(struct msm_dp *dp_display);
 void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp_display);
 
-void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor);
 bool msm_dp_wide_bus_available(const struct msm_dp *dp_display);
 
 #else
@@ -407,19 +405,10 @@ static inline int msm_dp_modeset_init(struct msm_dp *dp_display,
 	return -EINVAL;
 }
 
-static inline void msm_dp_irq_postinstall(struct msm_dp *dp_display)
-{
-}
-
 static inline void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp_display)
 {
 }
 
-static inline void msm_dp_debugfs_init(struct msm_dp *dp_display,
-		struct drm_minor *minor)
-{
-}
-
 static inline bool msm_dp_wide_bus_available(const struct msm_dp *dp_display)
 {
 	return false;
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index db1e748daa75..175ee4ab8a6f 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -226,9 +226,9 @@ static struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj,
 
 	msm_gem_assert_locked(obj);
 
-	if (GEM_WARN_ON(msm_obj->madv > madv)) {
-		DRM_DEV_ERROR(obj->dev->dev, "Invalid madv state: %u vs %u\n",
-			msm_obj->madv, madv);
+	if (msm_obj->madv > madv) {
+		DRM_DEV_DEBUG_DRIVER(obj->dev->dev, "Invalid madv state: %u vs %u\n",
+				     msm_obj->madv, madv);
 		return ERR_PTR(-EBUSY);
 	}
 
@@ -1058,6 +1058,7 @@ static void msm_gem_free_object(struct drm_gem_object *obj)
 
 	drm_gem_object_release(obj);
 
+	kfree(msm_obj->metadata);
 	kfree(msm_obj);
 }
 
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 8ddef5443140..8d414b072c29 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -9,6 +9,7 @@
 
 #include <linux/kref.h>
 #include <linux/dma-resv.h>
+#include "drm/drm_exec.h"
 #include "drm/gpu_scheduler.h"
 #include "msm_drv.h"
 
@@ -108,6 +109,10 @@ struct msm_gem_object {
 
 	char name[32]; /* Identifier to print for the debugfs files */
 
+	/* userspace metadata backchannel */
+	void *metadata;
+	u32 metadata_size;
+
 	/**
 	 * pin_count: Number of times the pages are pinned
 	 *
@@ -254,7 +259,7 @@ struct msm_gem_submit {
 	struct msm_gpu *gpu;
 	struct msm_gem_address_space *aspace;
 	struct list_head node;   /* node in ring submit list */
-	struct ww_acquire_ctx ticket;
+	struct drm_exec exec;
 	uint32_t seqno;		/* Sequence number of the submit on the ring */
 
 	/* Hw fence, which is created when the scheduler executes the job, and
@@ -270,9 +275,9 @@ struct msm_gem_submit {
 	int fence_id;       /* key into queue->fence_idr */
 	struct msm_gpu_submitqueue *queue;
 	struct pid *pid;    /* submitting process */
-	bool fault_dumped;  /* Limit devcoredump dumping to one per submit */
-	bool valid;         /* true if no cmdstream patching needed */
-	bool in_rb;         /* "sudo" mode, copy cmds into RB */
+	bool bos_pinned : 1;
+	bool fault_dumped:1;/* Limit devcoredump dumping to one per submit */
+	bool in_rb : 1;     /* "sudo" mode, copy cmds into RB */
 	struct msm_ringbuffer *ring;
 	unsigned int nr_cmds;
 	unsigned int nr_bos;
@@ -287,10 +292,6 @@ struct msm_gem_submit {
 		struct drm_msm_gem_submit_reloc *relocs;
 	} *cmd;  /* array of size nr_cmds */
 	struct {
-/* make sure these don't conflict w/ MSM_SUBMIT_BO_x */
-#define BO_VALID	0x8000	/* is current addr in cmdstream correct/valid? */
-#define BO_LOCKED	0x4000	/* obj lock is held */
-#define BO_PINNED	0x2000	/* obj (pages) is pinned and on active list */
 		uint32_t flags;
 		union {
 			struct drm_gem_object *obj;
diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c
index 5a7d48c02c4b..07ca4ddfe4e3 100644
--- a/drivers/gpu/drm/msm/msm_gem_shrinker.c
+++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c
@@ -75,7 +75,7 @@ static bool
 wait_for_idle(struct drm_gem_object *obj)
 {
 	enum dma_resv_usage usage = dma_resv_usage_rw(true);
-	return dma_resv_wait_timeout(obj->resv, usage, false, 1000) > 0;
+	return dma_resv_wait_timeout(obj->resv, usage, false, 10) > 0;
 }
 
 static bool
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index c002cabe7b9c..fba78193127d 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -17,6 +17,12 @@
 #include "msm_gem.h"
 #include "msm_gpu_trace.h"
 
+/* For userspace errors, use DRM_UT_DRIVER.. so that userspace can enable
+ * error msgs for debugging, but we don't spam dmesg by default
+ */
+#define SUBMIT_ERROR(submit, fmt, ...) \
+	DRM_DEV_DEBUG_DRIVER((submit)->dev->dev, fmt, ##__VA_ARGS__)
+
 /*
  * Cmdstream submission:
  */
@@ -37,7 +43,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
 	if (sz > SIZE_MAX)
 		return ERR_PTR(-ENOMEM);
 
-	submit = kzalloc(sz, GFP_KERNEL);
+	submit = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN);
 	if (!submit)
 		return ERR_PTR(-ENOMEM);
 
@@ -136,7 +142,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 
 		if ((submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) ||
 			!(submit_bo.flags & MANDATORY_FLAGS)) {
-			DRM_ERROR("invalid flags: %x\n", submit_bo.flags);
+			SUBMIT_ERROR(submit, "invalid flags: %x\n", submit_bo.flags);
 			ret = -EINVAL;
 			i = 0;
 			goto out;
@@ -144,8 +150,6 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 
 		submit->bos[i].handle = submit_bo.handle;
 		submit->bos[i].flags = submit_bo.flags;
-		/* in validate_objects() we figure out if this is true: */
-		submit->bos[i].iova  = submit_bo.presumed;
 	}
 
 	spin_lock(&file->table_lock);
@@ -158,7 +162,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 		 */
 		obj = idr_find(&file->object_idr, submit->bos[i].handle);
 		if (!obj) {
-			DRM_ERROR("invalid handle %u at index %u\n", submit->bos[i].handle, i);
+			SUBMIT_ERROR(submit, "invalid handle %u at index %u\n", submit->bos[i].handle, i);
 			ret = -EINVAL;
 			goto out_unlock;
 		}
@@ -202,13 +206,13 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit,
 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 			break;
 		default:
-			DRM_ERROR("invalid type: %08x\n", submit_cmd.type);
+			SUBMIT_ERROR(submit, "invalid type: %08x\n", submit_cmd.type);
 			return -EINVAL;
 		}
 
 		if (submit_cmd.size % 4) {
-			DRM_ERROR("non-aligned cmdstream buffer size: %u\n",
-					submit_cmd.size);
+			SUBMIT_ERROR(submit, "non-aligned cmdstream buffer size: %u\n",
+				     submit_cmd.size);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -228,7 +232,7 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit,
 			ret = -ENOMEM;
 			goto out;
 		}
-		submit->cmd[i].relocs = kmalloc(sz, GFP_KERNEL);
+		submit->cmd[i].relocs = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN);
 		if (!submit->cmd[i].relocs) {
 			ret = -ENOMEM;
 			goto out;
@@ -244,101 +248,30 @@ out:
 	return ret;
 }
 
-/* Unwind bo state, according to cleanup_flags.  In the success case, only
- * the lock is dropped at the end of the submit (and active/pin ref is dropped
- * later when the submit is retired).
- */
-static void submit_cleanup_bo(struct msm_gem_submit *submit, int i,
-		unsigned cleanup_flags)
-{
-	struct drm_gem_object *obj = submit->bos[i].obj;
-	unsigned flags = submit->bos[i].flags & cleanup_flags;
-
-	/*
-	 * Clear flags bit before dropping lock, so that the msm_job_run()
-	 * path isn't racing with submit_cleanup() (ie. the read/modify/
-	 * write is protected by the obj lock in all paths)
-	 */
-	submit->bos[i].flags &= ~cleanup_flags;
-
-	if (flags & BO_PINNED)
-		msm_gem_unpin_locked(obj);
-
-	if (flags & BO_LOCKED)
-		dma_resv_unlock(obj->resv);
-}
-
-static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i)
-{
-	unsigned cleanup_flags = BO_PINNED | BO_LOCKED;
-	submit_cleanup_bo(submit, i, cleanup_flags);
-
-	if (!(submit->bos[i].flags & BO_VALID))
-		submit->bos[i].iova = 0;
-}
-
 /* This is where we make sure all the bo's are reserved and pin'd: */
 static int submit_lock_objects(struct msm_gem_submit *submit)
 {
-	int contended, slow_locked = -1, i, ret = 0;
-
-retry:
-	for (i = 0; i < submit->nr_bos; i++) {
-		struct drm_gem_object *obj = submit->bos[i].obj;
-
-		if (slow_locked == i)
-			slow_locked = -1;
+	int ret;
 
-		contended = i;
+	drm_exec_init(&submit->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, submit->nr_bos);
 
-		if (!(submit->bos[i].flags & BO_LOCKED)) {
-			ret = dma_resv_lock_interruptible(obj->resv,
-							  &submit->ticket);
+	drm_exec_until_all_locked (&submit->exec) {
+		for (unsigned i = 0; i < submit->nr_bos; i++) {
+			struct drm_gem_object *obj = submit->bos[i].obj;
+			ret = drm_exec_prepare_obj(&submit->exec, obj, 1);
+			drm_exec_retry_on_contention(&submit->exec);
 			if (ret)
-				goto fail;
-			submit->bos[i].flags |= BO_LOCKED;
+				goto error;
 		}
 	}
 
-	ww_acquire_done(&submit->ticket);
-
 	return 0;
 
-fail:
-	if (ret == -EALREADY) {
-		DRM_ERROR("handle %u at index %u already on submit list\n",
-				submit->bos[i].handle, i);
-		ret = -EINVAL;
-	}
-
-	for (; i >= 0; i--)
-		submit_unlock_unpin_bo(submit, i);
-
-	if (slow_locked > 0)
-		submit_unlock_unpin_bo(submit, slow_locked);
-
-	if (ret == -EDEADLK) {
-		struct drm_gem_object *obj = submit->bos[contended].obj;
-		/* we lost out in a seqno race, lock and retry.. */
-		ret = dma_resv_lock_slow_interruptible(obj->resv,
-						       &submit->ticket);
-		if (!ret) {
-			submit->bos[contended].flags |= BO_LOCKED;
-			slow_locked = contended;
-			goto retry;
-		}
-
-		/* Not expecting -EALREADY here, if the bo was already
-		 * locked, we should have gotten -EALREADY already from
-		 * the dma_resv_lock_interruptable() call.
-		 */
-		WARN_ON_ONCE(ret == -EALREADY);
-	}
-
+error:
 	return ret;
 }
 
-static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
+static int submit_fence_sync(struct msm_gem_submit *submit)
 {
 	int i, ret = 0;
 
@@ -346,22 +279,6 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
 		struct drm_gem_object *obj = submit->bos[i].obj;
 		bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE;
 
-		/* NOTE: _reserve_shared() must happen before
-		 * _add_shared_fence(), which makes this a slightly
-		 * strange place to call it.  OTOH this is a
-		 * convenient can-fail point to hook it in.
-		 */
-		ret = dma_resv_reserve_fences(obj->resv, 1);
-		if (ret)
-			return ret;
-
-		/* If userspace has determined that explicit fencing is
-		 * used, it can disable implicit sync on the entire
-		 * submit:
-		 */
-		if (no_implicit)
-			continue;
-
 		/* Otherwise userspace can ask for implicit sync to be
 		 * disabled on specific buffers.  This is useful for internal
 		 * usermode driver managed buffers, suballocation, etc.
@@ -384,8 +301,6 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
 	struct msm_drm_private *priv = submit->dev->dev_private;
 	int i, ret = 0;
 
-	submit->valid = true;
-
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct drm_gem_object *obj = submit->bos[i].obj;
 		struct msm_gem_vma *vma;
@@ -401,14 +316,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
 		if (ret)
 			break;
 
-		if (vma->iova == submit->bos[i].iova) {
-			submit->bos[i].flags |= BO_VALID;
-		} else {
-			submit->bos[i].iova = vma->iova;
-			/* iova changed, so address in cmdstream is not valid: */
-			submit->bos[i].flags &= ~BO_VALID;
-			submit->valid = false;
-		}
+		submit->bos[i].iova = vma->iova;
 	}
 
 	/*
@@ -421,13 +329,28 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
 	mutex_lock(&priv->lru.lock);
 	for (i = 0; i < submit->nr_bos; i++) {
 		msm_gem_pin_obj_locked(submit->bos[i].obj);
-		submit->bos[i].flags |= BO_PINNED;
 	}
 	mutex_unlock(&priv->lru.lock);
 
+	submit->bos_pinned = true;
+
 	return ret;
 }
 
+static void submit_unpin_objects(struct msm_gem_submit *submit)
+{
+	if (!submit->bos_pinned)
+		return;
+
+	for (int i = 0; i < submit->nr_bos; i++) {
+		struct drm_gem_object *obj = submit->bos[i].obj;
+
+		msm_gem_unpin_locked(obj);
+	}
+
+	submit->bos_pinned = false;
+}
+
 static void submit_attach_object_fences(struct msm_gem_submit *submit)
 {
 	int i;
@@ -445,11 +368,11 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit)
 }
 
 static int submit_bo(struct msm_gem_submit *submit, uint32_t idx,
-		struct drm_gem_object **obj, uint64_t *iova, bool *valid)
+		struct drm_gem_object **obj, uint64_t *iova)
 {
 	if (idx >= submit->nr_bos) {
-		DRM_ERROR("invalid buffer index: %u (out of %u)\n",
-				idx, submit->nr_bos);
+		SUBMIT_ERROR(submit, "invalid buffer index: %u (out of %u)\n",
+			     idx, submit->nr_bos);
 		return -EINVAL;
 	}
 
@@ -457,8 +380,6 @@ static int submit_bo(struct msm_gem_submit *submit, uint32_t idx,
 		*obj = submit->bos[idx].obj;
 	if (iova)
 		*iova = submit->bos[idx].iova;
-	if (valid)
-		*valid = !!(submit->bos[idx].flags & BO_VALID);
 
 	return 0;
 }
@@ -471,11 +392,8 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
 	uint32_t *ptr;
 	int ret = 0;
 
-	if (!nr_relocs)
-		return 0;
-
 	if (offset % 4) {
-		DRM_ERROR("non-aligned cmdstream buffer: %u\n", offset);
+		SUBMIT_ERROR(submit, "non-aligned cmdstream buffer: %u\n", offset);
 		return -EINVAL;
 	}
 
@@ -494,11 +412,10 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
 		struct drm_msm_gem_submit_reloc submit_reloc = relocs[i];
 		uint32_t off;
 		uint64_t iova;
-		bool valid;
 
 		if (submit_reloc.submit_offset % 4) {
-			DRM_ERROR("non-aligned reloc offset: %u\n",
-					submit_reloc.submit_offset);
+			SUBMIT_ERROR(submit, "non-aligned reloc offset: %u\n",
+				     submit_reloc.submit_offset);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -508,18 +425,15 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob
 
 		if ((off >= (obj->size / 4)) ||
 				(off < last_offset)) {
-			DRM_ERROR("invalid offset %u at reloc %u\n", off, i);
+			SUBMIT_ERROR(submit, "invalid offset %u at reloc %u\n", off, i);
 			ret = -EINVAL;
 			goto out;
 		}
 
-		ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova, &valid);
+		ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova);
 		if (ret)
 			goto out;
 
-		if (valid)
-			continue;
-
 		iova += submit_reloc.reloc_offset;
 
 		if (submit_reloc.shift < 0)
@@ -544,18 +458,14 @@ out:
  */
 static void submit_cleanup(struct msm_gem_submit *submit, bool error)
 {
-	unsigned cleanup_flags = BO_LOCKED;
-	unsigned i;
-
-	if (error)
-		cleanup_flags |= BO_PINNED;
-
-	for (i = 0; i < submit->nr_bos; i++) {
-		struct drm_gem_object *obj = submit->bos[i].obj;
-		submit_cleanup_bo(submit, i, cleanup_flags);
-		if (error)
-			drm_gem_object_put(obj);
+	if (error) {
+		submit_unpin_objects(submit);
+		/* job wasn't enqueued to scheduler, so early retirement: */
+		msm_submit_retire(submit);
 	}
+
+	if (submit->exec.objects)
+		drm_exec_fini(&submit->exec);
 }
 
 void msm_submit_retire(struct msm_gem_submit *submit)
@@ -749,7 +659,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	struct msm_submit_post_dep *post_deps = NULL;
 	struct drm_syncobj **syncobjs_to_reset = NULL;
 	int out_fence_fd = -1;
-	bool has_ww_ticket = false;
 	unsigned i;
 	int ret;
 
@@ -855,15 +764,15 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		goto out;
 
 	/* copy_*_user while holding a ww ticket upsets lockdep */
-	ww_acquire_init(&submit->ticket, &reservation_ww_class);
-	has_ww_ticket = true;
 	ret = submit_lock_objects(submit);
 	if (ret)
 		goto out;
 
-	ret = submit_fence_sync(submit, !!(args->flags & MSM_SUBMIT_NO_IMPLICIT));
-	if (ret)
-		goto out;
+	if (!(args->flags & MSM_SUBMIT_NO_IMPLICIT)) {
+		ret = submit_fence_sync(submit);
+		if (ret)
+			goto out;
+	}
 
 	ret = submit_pin_objects(submit);
 	if (ret)
@@ -873,32 +782,27 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		struct drm_gem_object *obj;
 		uint64_t iova;
 
-		ret = submit_bo(submit, submit->cmd[i].idx,
-				&obj, &iova, NULL);
+		ret = submit_bo(submit, submit->cmd[i].idx, &obj, &iova);
 		if (ret)
 			goto out;
 
 		if (!submit->cmd[i].size ||
 			((submit->cmd[i].size + submit->cmd[i].offset) >
 				obj->size / 4)) {
-			DRM_ERROR("invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
+			SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
 			ret = -EINVAL;
 			goto out;
 		}
 
 		submit->cmd[i].iova = iova + (submit->cmd[i].offset * 4);
 
-		if (submit->valid)
+		if (likely(!submit->cmd[i].nr_relocs))
 			continue;
 
 		if (!gpu->allow_relocs) {
-			if (submit->cmd[i].nr_relocs) {
-				DRM_ERROR("relocs not allowed\n");
-				ret = -EINVAL;
-				goto out;
-			}
-
-			continue;
+			SUBMIT_ERROR(submit, "relocs not allowed\n");
+			ret = -EINVAL;
+			goto out;
 		}
 
 		ret = submit_reloc(submit, obj, submit->cmd[i].offset * 4,
@@ -974,6 +878,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		}
 	}
 
+	if (ret)
+		goto out;
+
 	submit_attach_object_fences(submit);
 
 	/* The scheduler owns a ref now: */
@@ -993,8 +900,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 
 out:
 	submit_cleanup(submit, !!ret);
-	if (has_ww_ticket)
-		ww_acquire_fini(&submit->ticket);
 out_unlock:
 	mutex_unlock(&queue->lock);
 out_post_unlock:
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 7f64c6667300..095390774f22 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -292,8 +292,7 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
 	/* Set the active crash state to be dumped on failure */
 	gpu->crashstate = state;
 
-	/* FIXME: Release the crashstate if this errors out? */
-	dev_coredumpm(gpu->dev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL,
+	dev_coredumpm(&gpu->pdev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL,
 		msm_gpu_devcoredump_read, msm_gpu_devcoredump_free);
 }
 #else
@@ -366,29 +365,31 @@ static void recover_worker(struct kthread_work *work)
 	DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name);
 
 	submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
-	if (submit) {
-		/* Increment the fault counts */
-		submit->queue->faults++;
-		if (submit->aspace)
-			submit->aspace->faults++;
 
-		get_comm_cmdline(submit, &comm, &cmd);
+	/*
+	 * If the submit retired while we were waiting for the worker to run,
+	 * or waiting to acquire the gpu lock, then nothing more to do.
+	 */
+	if (!submit)
+		goto out_unlock;
 
-		if (comm && cmd) {
-			DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
-				gpu->name, comm, cmd);
+	/* Increment the fault counts */
+	submit->queue->faults++;
+	if (submit->aspace)
+		submit->aspace->faults++;
 
-			msm_rd_dump_submit(priv->hangrd, submit,
-				"offending task: %s (%s)", comm, cmd);
-		} else {
-			msm_rd_dump_submit(priv->hangrd, submit, NULL);
-		}
+	get_comm_cmdline(submit, &comm, &cmd);
+
+	if (comm && cmd) {
+		DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
+			      gpu->name, comm, cmd);
+
+		msm_rd_dump_submit(priv->hangrd, submit,
+				   "offending task: %s (%s)", comm, cmd);
 	} else {
-		/*
-		 * We couldn't attribute this fault to any particular context,
-		 * so increment the global fault count instead.
-		 */
-		gpu->global_faults++;
+		DRM_DEV_ERROR(dev->dev, "%s: offending task: unknown\n", gpu->name);
+
+		msm_rd_dump_submit(priv->hangrd, submit, NULL);
 	}
 
 	/* Record the crash state */
@@ -441,6 +442,7 @@ static void recover_worker(struct kthread_work *work)
 
 	pm_runtime_put(&gpu->pdev->dev);
 
+out_unlock:
 	mutex_unlock(&gpu->lock);
 
 	msm_gpu_retire(gpu);
diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index 6865db1e3ce8..455b2e3a0cdd 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -28,6 +28,8 @@
 
 #define MIN_IB_BW	400000000UL /* Min ib vote 400MB */
 
+#define DEFAULT_REG_BW	153600 /* Used in mdss fbdev driver */
+
 struct msm_mdss {
 	struct device *dev;
 
@@ -40,8 +42,9 @@ struct msm_mdss {
 		struct irq_domain *domain;
 	} irq_controller;
 	const struct msm_mdss_data *mdss_data;
-	struct icc_path *path[2];
-	u32 num_paths;
+	struct icc_path *mdp_path[2];
+	u32 num_mdp_paths;
+	struct icc_path *reg_bus_path;
 };
 
 static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
@@ -49,38 +52,26 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev,
 {
 	struct icc_path *path0;
 	struct icc_path *path1;
+	struct icc_path *reg_bus_path;
 
-	path0 = of_icc_get(dev, "mdp0-mem");
+	path0 = devm_of_icc_get(dev, "mdp0-mem");
 	if (IS_ERR_OR_NULL(path0))
 		return PTR_ERR_OR_ZERO(path0);
 
-	msm_mdss->path[0] = path0;
-	msm_mdss->num_paths = 1;
+	msm_mdss->mdp_path[0] = path0;
+	msm_mdss->num_mdp_paths = 1;
 
-	path1 = of_icc_get(dev, "mdp1-mem");
+	path1 = devm_of_icc_get(dev, "mdp1-mem");
 	if (!IS_ERR_OR_NULL(path1)) {
-		msm_mdss->path[1] = path1;
-		msm_mdss->num_paths++;
+		msm_mdss->mdp_path[1] = path1;
+		msm_mdss->num_mdp_paths++;
 	}
 
-	return 0;
-}
-
-static void msm_mdss_put_icc_path(void *data)
-{
-	struct msm_mdss *msm_mdss = data;
-	int i;
-
-	for (i = 0; i < msm_mdss->num_paths; i++)
-		icc_put(msm_mdss->path[i]);
-}
-
-static void msm_mdss_icc_request_bw(struct msm_mdss *msm_mdss, unsigned long bw)
-{
-	int i;
+	reg_bus_path = of_icc_get(dev, "cpu-cfg");
+	if (!IS_ERR_OR_NULL(reg_bus_path))
+		msm_mdss->reg_bus_path = reg_bus_path;
 
-	for (i = 0; i < msm_mdss->num_paths; i++)
-		icc_set_bw(msm_mdss->path[i], 0, Bps_to_icc(bw));
+	return 0;
 }
 
 static void msm_mdss_irq(struct irq_desc *desc)
@@ -236,14 +227,22 @@ const struct msm_mdss_data *msm_mdss_get_mdss_data(struct device *dev)
 
 static int msm_mdss_enable(struct msm_mdss *msm_mdss)
 {
-	int ret;
+	int ret, i;
 
 	/*
 	 * Several components have AXI clocks that can only be turned on if
 	 * the interconnect is enabled (non-zero bandwidth). Let's make sure
 	 * that the interconnects are at least at a minimum amount.
 	 */
-	msm_mdss_icc_request_bw(msm_mdss, MIN_IB_BW);
+	for (i = 0; i < msm_mdss->num_mdp_paths; i++)
+		icc_set_bw(msm_mdss->mdp_path[i], 0, Bps_to_icc(MIN_IB_BW));
+
+	if (msm_mdss->mdss_data && msm_mdss->mdss_data->reg_bus_bw)
+		icc_set_bw(msm_mdss->reg_bus_path, 0,
+			   msm_mdss->mdss_data->reg_bus_bw);
+	else
+		icc_set_bw(msm_mdss->reg_bus_path, 0,
+			   DEFAULT_REG_BW);
 
 	ret = clk_bulk_prepare_enable(msm_mdss->num_clocks, msm_mdss->clocks);
 	if (ret) {
@@ -295,8 +294,15 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss)
 
 static int msm_mdss_disable(struct msm_mdss *msm_mdss)
 {
+	int i;
+
 	clk_bulk_disable_unprepare(msm_mdss->num_clocks, msm_mdss->clocks);
-	msm_mdss_icc_request_bw(msm_mdss, 0);
+
+	for (i = 0; i < msm_mdss->num_mdp_paths; i++)
+		icc_set_bw(msm_mdss->mdp_path[i], 0, 0);
+
+	if (msm_mdss->reg_bus_path)
+		icc_set_bw(msm_mdss->reg_bus_path, 0, 0);
 
 	return 0;
 }
@@ -384,6 +390,8 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5
 	if (!msm_mdss)
 		return ERR_PTR(-ENOMEM);
 
+	msm_mdss->mdss_data = of_device_get_match_data(&pdev->dev);
+
 	msm_mdss->mmio = devm_platform_ioremap_resource_byname(pdev, is_mdp5 ? "mdss_phys" : "mdss");
 	if (IS_ERR(msm_mdss->mmio))
 		return ERR_CAST(msm_mdss->mmio);
@@ -393,9 +401,6 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5
 	ret = msm_mdss_parse_data_bus_icc_path(&pdev->dev, msm_mdss);
 	if (ret)
 		return ERR_PTR(ret);
-	ret = devm_add_action_or_reset(&pdev->dev, msm_mdss_put_icc_path, msm_mdss);
-	if (ret)
-		return ERR_PTR(ret);
 
 	if (is_mdp5)
 		ret = mdp5_mdss_parse_clock(pdev, &msm_mdss->clocks);
@@ -477,8 +482,6 @@ static int mdss_probe(struct platform_device *pdev)
 	if (IS_ERR(mdss))
 		return PTR_ERR(mdss);
 
-	mdss->mdss_data = of_device_get_match_data(&pdev->dev);
-
 	platform_set_drvdata(pdev, mdss);
 
 	/*
@@ -510,11 +513,13 @@ static const struct msm_mdss_data msm8998_data = {
 	.ubwc_enc_version = UBWC_1_0,
 	.ubwc_dec_version = UBWC_1_0,
 	.highest_bank_bit = 2,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data qcm2290_data = {
 	/* no UBWC */
 	.highest_bank_bit = 0x2,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sc7180_data = {
@@ -522,6 +527,7 @@ static const struct msm_mdss_data sc7180_data = {
 	.ubwc_dec_version = UBWC_2_0,
 	.ubwc_static = 0x1e,
 	.highest_bank_bit = 0x3,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sc7280_data = {
@@ -531,6 +537,7 @@ static const struct msm_mdss_data sc7280_data = {
 	.ubwc_static = 1,
 	.highest_bank_bit = 1,
 	.macrotile_mode = 1,
+	.reg_bus_bw = 74000,
 };
 
 static const struct msm_mdss_data sc8180x_data = {
@@ -538,6 +545,7 @@ static const struct msm_mdss_data sc8180x_data = {
 	.ubwc_dec_version = UBWC_3_0,
 	.highest_bank_bit = 3,
 	.macrotile_mode = 1,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sc8280xp_data = {
@@ -545,14 +553,22 @@ static const struct msm_mdss_data sc8280xp_data = {
 	.ubwc_dec_version = UBWC_4_0,
 	.ubwc_swizzle = 6,
 	.ubwc_static = 1,
-	.highest_bank_bit = 2,
+	.highest_bank_bit = 3,
 	.macrotile_mode = 1,
+	.reg_bus_bw = 76800,
+};
+
+static const struct msm_mdss_data sdm670_data = {
+	.ubwc_enc_version = UBWC_2_0,
+	.ubwc_dec_version = UBWC_2_0,
+	.highest_bank_bit = 1,
 };
 
 static const struct msm_mdss_data sdm845_data = {
 	.ubwc_enc_version = UBWC_2_0,
 	.ubwc_dec_version = UBWC_2_0,
 	.highest_bank_bit = 2,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sm6350_data = {
@@ -561,12 +577,14 @@ static const struct msm_mdss_data sm6350_data = {
 	.ubwc_swizzle = 6,
 	.ubwc_static = 0x1e,
 	.highest_bank_bit = 1,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sm8150_data = {
 	.ubwc_enc_version = UBWC_3_0,
 	.ubwc_dec_version = UBWC_3_0,
 	.highest_bank_bit = 2,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sm6115_data = {
@@ -575,6 +593,7 @@ static const struct msm_mdss_data sm6115_data = {
 	.ubwc_swizzle = 7,
 	.ubwc_static = 0x11f,
 	.highest_bank_bit = 0x1,
+	.reg_bus_bw = 76800,
 };
 
 static const struct msm_mdss_data sm6125_data = {
@@ -592,6 +611,18 @@ static const struct msm_mdss_data sm8250_data = {
 	/* TODO: highest_bank_bit = 2 for LP_DDR4 */
 	.highest_bank_bit = 3,
 	.macrotile_mode = 1,
+	.reg_bus_bw = 76800,
+};
+
+static const struct msm_mdss_data sm8350_data = {
+	.ubwc_enc_version = UBWC_4_0,
+	.ubwc_dec_version = UBWC_4_0,
+	.ubwc_swizzle = 6,
+	.ubwc_static = 1,
+	/* TODO: highest_bank_bit = 2 for LP_DDR4 */
+	.highest_bank_bit = 3,
+	.macrotile_mode = 1,
+	.reg_bus_bw = 74000,
 };
 
 static const struct msm_mdss_data sm8550_data = {
@@ -602,11 +633,13 @@ static const struct msm_mdss_data sm8550_data = {
 	/* TODO: highest_bank_bit = 2 for LP_DDR4 */
 	.highest_bank_bit = 3,
 	.macrotile_mode = 1,
+	.reg_bus_bw = 57000,
 };
 static const struct of_device_id mdss_dt_match[] = {
 	{ .compatible = "qcom,mdss" },
 	{ .compatible = "qcom,msm8998-mdss", .data = &msm8998_data },
 	{ .compatible = "qcom,qcm2290-mdss", .data = &qcm2290_data },
+	{ .compatible = "qcom,sdm670-mdss", .data = &sdm670_data },
 	{ .compatible = "qcom,sdm845-mdss", .data = &sdm845_data },
 	{ .compatible = "qcom,sc7180-mdss", .data = &sc7180_data },
 	{ .compatible = "qcom,sc7280-mdss", .data = &sc7280_data },
@@ -618,9 +651,10 @@ static const struct of_device_id mdss_dt_match[] = {
 	{ .compatible = "qcom,sm6375-mdss", .data = &sm6350_data },
 	{ .compatible = "qcom,sm8150-mdss", .data = &sm8150_data },
 	{ .compatible = "qcom,sm8250-mdss", .data = &sm8250_data },
-	{ .compatible = "qcom,sm8350-mdss", .data = &sm8250_data },
-	{ .compatible = "qcom,sm8450-mdss", .data = &sm8250_data },
+	{ .compatible = "qcom,sm8350-mdss", .data = &sm8350_data },
+	{ .compatible = "qcom,sm8450-mdss", .data = &sm8350_data },
 	{ .compatible = "qcom,sm8550-mdss", .data = &sm8550_data },
+	{ .compatible = "qcom,sm8650-mdss", .data = &sm8550_data},
 	{}
 };
 MODULE_DEVICE_TABLE(of, mdss_dt_match);
diff --git a/drivers/gpu/drm/msm/msm_mdss.h b/drivers/gpu/drm/msm/msm_mdss.h
index 02bbab42adbc..3afef4b1786d 100644
--- a/drivers/gpu/drm/msm/msm_mdss.h
+++ b/drivers/gpu/drm/msm/msm_mdss.h
@@ -14,6 +14,7 @@ struct msm_mdss_data {
 	u32 ubwc_static;
 	u32 highest_bank_bit;
 	u32 macrotile_mode;
+	u32 reg_bus_bw;
 };
 
 #define UBWC_1_0 0x10000000
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index 5adc51f7ab59..ca44fd291c5b 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -270,6 +270,9 @@ int msm_rd_debugfs_init(struct drm_minor *minor)
 	struct msm_rd_state *rd;
 	int ret;
 
+	if (!priv->gpu_pdev)
+		return 0;
+
 	/* only create on first minor: */
 	if (priv->rd)
 		return 0;
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 4968568e3b54..4bc13f7d005a 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -29,9 +29,10 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
 		struct drm_gem_object *obj = submit->bos[i].obj;
 
 		msm_gem_unpin_active(obj);
-		submit->bos[i].flags &= ~BO_PINNED;
 	}
 
+	submit->bos_pinned = false;
+
 	mutex_unlock(&priv->lru.lock);
 
 	msm_gpu_submit(gpu, submit);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 11fe75b68e95..8d37a694b772 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -2476,7 +2476,7 @@ nv50_disp_atomic_commit(struct drm_device *dev,
 
 err_cleanup:
 	if (ret)
-		drm_atomic_helper_cleanup_planes(dev, state);
+		drm_atomic_helper_unprepare_planes(dev, state);
 done:
 	pm_runtime_put_autosuspend(dev->dev);
 	return ret;
diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h
index 5a2f273d95c8..0e32e71e123f 100644
--- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h
+++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h
@@ -26,6 +26,49 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+/**
+ * msgqTxHeader -- TX queue data structure
+ * @version: the version of this structure, must be 0
+ * @size: the size of the entire queue, including this header
+ * @msgSize: the padded size of queue element, 16 is minimum
+ * @msgCount: the number of elements in this queue
+ * @writePtr: head index of this queue
+ * @flags: 1 = swap the RX pointers
+ * @rxHdrOff: offset of readPtr in this structure
+ * @entryOff: offset of beginning of queue (msgqRxHeader), relative to
+ *          beginning of this structure
+ *
+ * The command queue is a queue of RPCs that are sent from the driver to the
+ * GSP.  The status queue is a queue of messages/responses from GSP-RM to the
+ * driver.  Although the driver allocates memory for both queues, the command
+ * queue is owned by the driver and the status queue is owned by GSP-RM.  In
+ * addition, the headers of the two queues must not share the same 4K page.
+ *
+ * Each queue is prefixed with this data structure.  The idea is that a queue
+ * and its header are written to only by their owner.  That is, only the
+ * driver writes to the command queue and command queue header, and only the
+ * GSP writes to the status (receive) queue and its header.
+ *
+ * This is enforced by the concept of "swapping" the RX pointers.  This is
+ * why the 'flags' field must be set to 1.  'rxHdrOff' is how the GSP knows
+ * where the where the tail pointer of its status queue.
+ *
+ * When the driver writes a new RPC to the command queue, it updates writePtr.
+ * When it reads a new message from the status queue, it updates readPtr.  In
+ * this way, the GSP knows when a new command is in the queue (it polls
+ * writePtr) and it knows how much free space is in the status queue (it
+ * checks readPtr).  The driver never cares about how much free space is in
+ * the status queue.
+ *
+ * As usual, producers write to the head pointer, and consumers read from the
+ * tail pointer.  When head == tail, the queue is empty.
+ *
+ * So to summarize:
+ * command.writePtr = head of command queue
+ * command.readPtr = tail of status queue
+ * status.writePtr = head of status queue
+ * status.readPtr = tail of command queue
+ */
 typedef struct
 {
     NvU32 version;   // queue version
@@ -38,6 +81,14 @@ typedef struct
     NvU32 entryOff;  // Offset of entries from start of backing store.
 } msgqTxHeader;
 
+/**
+ * msgqRxHeader - RX queue data structure
+ * @readPtr: tail index of the other queue
+ *
+ * Although this is a separate struct, it could easily be merged into
+ * msgqTxHeader.  msgqTxHeader.rxHdrOff is simply the offset of readPtr
+ * from the beginning of msgqTxHeader.
+ */
 typedef struct
 {
     NvU32 readPtr; // message id of last message read
diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h
index 754c6af42f30..10121218f4d3 100644
--- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h
+++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h
@@ -38,7 +38,7 @@ typedef struct PACKED_REGISTRY_TABLE
 {
     NvU32                   size;
     NvU32                   numEntries;
-    PACKED_REGISTRY_ENTRY   entries[0];
+    PACKED_REGISTRY_ENTRY   entries[] __counted_by(numEntries);
 } PACKED_REGISTRY_TABLE;
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index b7dda486a7ea..00cc7d1abaa3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -325,8 +325,9 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
 			    (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
 				continue;
 
-			if (pi < 0)
-				pi = i;
+			/* pick the last one as it will be smallest. */
+			pi = i;
+
 			/* Stop once the buffer is larger than the current page size. */
 			if (*size >= 1ULL << vmm->page[i].shift)
 				break;
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index dae3baf707a0..4f223c972c6a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1347,7 +1347,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job,
 		}
 	}
 
-	drm_exec_init(exec, vme->flags);
+	drm_exec_init(exec, vme->flags, 0);
 	drm_exec_until_all_locked(exec) {
 		ret = bind_lock_validate(job, exec, vme->num_fences);
 		drm_exec_retry_on_contention(exec);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index dc44f5c7833f..44fb86841c05 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -365,10 +365,8 @@ r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *argv, bool wait, u32 repc)
 	}
 
 	ret = r535_gsp_cmdq_push(gsp, rpc);
-	if (ret) {
-		mutex_unlock(&gsp->cmdq.mutex);
+	if (ret)
 		return ERR_PTR(ret);
-	}
 
 	if (wait) {
 		msg = r535_gsp_msg_recv(gsp, fn, repc);
@@ -1048,7 +1046,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
 	char *strings;
 	int str_offset;
 	int i;
-	size_t rpc_size = sizeof(*rpc) + sizeof(rpc->entries[0]) * NV_GSP_REG_NUM_ENTRIES;
+	size_t rpc_size = struct_size(rpc, entries, NV_GSP_REG_NUM_ENTRIES);
 
 	/* add strings + null terminator */
 	for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++)
@@ -1379,6 +1377,13 @@ r535_gsp_msg_post_event(void *priv, u32 fn, void *repv, u32 repc)
 	return 0;
 }
 
+/**
+ * r535_gsp_msg_run_cpu_sequencer() -- process I/O commands from the GSP
+ *
+ * The GSP sequencer is a list of I/O commands that the GSP can send to
+ * the driver to perform for various purposes.  The most common usage is to
+ * perform a special mid-initialization reset.
+ */
 static int
 r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc)
 {
@@ -1718,6 +1723,23 @@ r535_gsp_libos_id8(const char *name)
 	return id;
 }
 
+/**
+ * create_pte_array() - creates a PTE array of a physically contiguous buffer
+ * @ptes: pointer to the array
+ * @addr: base address of physically contiguous buffer (GSP_PAGE_SIZE aligned)
+ * @size: size of the buffer
+ *
+ * GSP-RM sometimes expects physically-contiguous buffers to have an array of
+ * "PTEs" for each page in that buffer.  Although in theory that allows for
+ * the buffer to be physically discontiguous, GSP-RM does not currently
+ * support that.
+ *
+ * In this case, the PTEs are DMA addresses of each page of the buffer.  Since
+ * the buffer is physically contiguous, calculating all the PTEs is simple
+ * math.
+ *
+ * See memdescGetPhysAddrsForGpu()
+ */
 static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size)
 {
 	unsigned int num_pages = DIV_ROUND_UP_ULL(size, GSP_PAGE_SIZE);
@@ -1727,6 +1749,35 @@ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size)
 		ptes[i] = (u64)addr + (i << GSP_PAGE_SHIFT);
 }
 
+/**
+ * r535_gsp_libos_init() -- create the libos arguments structure
+ *
+ * The logging buffers are byte queues that contain encoded printf-like
+ * messages from GSP-RM.  They need to be decoded by a special application
+ * that can parse the buffers.
+ *
+ * The 'loginit' buffer contains logs from early GSP-RM init and
+ * exception dumps.  The 'logrm' buffer contains the subsequent logs. Both are
+ * written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE.
+ *
+ * The physical address map for the log buffer is stored in the buffer
+ * itself, starting with offset 1. Offset 0 contains the "put" pointer.
+ *
+ * The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is
+ * configured for a larger page size (e.g. 64K pages), we need to give
+ * the GSP an array of 4K pages. Fortunately, since the buffer is
+ * physically contiguous, it's simple math to calculate the addresses.
+ *
+ * The buffers must be a multiple of GSP_PAGE_SIZE.  GSP-RM also currently
+ * ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the
+ * buffers to be physically contiguous anyway.
+ *
+ * The memory allocated for the arguments must remain until the GSP sends the
+ * init_done RPC.
+ *
+ * See _kgspInitLibosLoggingStructures (allocates memory for buffers)
+ * See kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array)
+ */
 static int
 r535_gsp_libos_init(struct nvkm_gsp *gsp)
 {
@@ -1837,6 +1888,35 @@ nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3)
 		nvkm_gsp_mem_dtor(gsp, &rx3->mem[i]);
 }
 
+/**
+ * nvkm_gsp_radix3_sg - build a radix3 table from a S/G list
+ *
+ * The GSP uses a three-level page table, called radix3, to map the firmware.
+ * Each 64-bit "pointer" in the table is either the bus address of an entry in
+ * the next table (for levels 0 and 1) or the bus address of the next page in
+ * the GSP firmware image itself.
+ *
+ * Level 0 contains a single entry in one page that points to the first page
+ * of level 1.
+ *
+ * Level 1, since it's also only one page in size, contains up to 512 entries,
+ * one for each page in Level 2.
+ *
+ * Level 2 can be up to 512 pages in size, and each of those entries points to
+ * the next page of the firmware image.  Since there can be up to 512*512
+ * pages, that limits the size of the firmware to 512*512*GSP_PAGE_SIZE = 1GB.
+ *
+ * Internally, the GSP has its window into system memory, but the base
+ * physical address of the aperture is not 0.  In fact, it varies depending on
+ * the GPU architecture.  Since the GPU is a PCI device, this window is
+ * accessed via DMA and is therefore bound by IOMMU translation.  The end
+ * result is that GSP-RM must translate the bus addresses in the table to GSP
+ * physical addresses.  All this should happen transparently.
+ *
+ * Returns 0 on success, or negative error code
+ *
+ * See kgspCreateRadix3_IMPL
+ */
 static int
 nvkm_gsp_radix3_sg(struct nvkm_device *device, struct sg_table *sgt, u64 size,
 		   struct nvkm_gsp_radix3 *rx3)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
index e34bc6076401..8379e72d77ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
@@ -31,7 +31,7 @@ tu102_vmm_flush(struct nvkm_vmm *vmm, int depth)
 
 	type |= 0x00000001; /* PAGE_ALL */
 	if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR]))
-		type |= 0x00000004; /* HUB_ONLY */
+		type |= 0x00000006; /* HUB_ONLY | ALL PDB (hack) */
 
 	mutex_lock(&vmm->mmu->mutex);
 
diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
index be8f48e3c1db..c4c0f08e9202 100644
--- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
+++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
@@ -1764,6 +1764,7 @@ static const struct panel_desc starry_qfh032011_53g_desc = {
 	.mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
 		      MIPI_DSI_MODE_LPM,
 	.init_cmds = starry_qfh032011_53g_init_cmd,
+	.lp11_before_reset = true,
 };
 
 static const struct drm_display_mode starry_himax83102_j02_default_mode = {
diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36523.c b/drivers/gpu/drm/panel/panel-novatek-nt36523.c
index 9b9a7eb1bc60..a189ce236328 100644
--- a/drivers/gpu/drm/panel/panel-novatek-nt36523.c
+++ b/drivers/gpu/drm/panel/panel-novatek-nt36523.c
@@ -1254,9 +1254,9 @@ static int nt36523_probe(struct mipi_dsi_device *dsi)
 			return dev_err_probe(dev, -EPROBE_DEFER, "cannot get secondary DSI host\n");
 
 		pinfo->dsi[1] = mipi_dsi_device_register_full(dsi1_host, info);
-		if (!pinfo->dsi[1]) {
+		if (IS_ERR(pinfo->dsi[1])) {
 			dev_err(dev, "cannot get secondary DSI device\n");
-			return -ENODEV;
+			return PTR_ERR(pinfo->dsi[1]);
 		}
 	}
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
index f59c82ea8870..2d30da38c2c3 100644
--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
@@ -29,14 +29,20 @@ static void panfrost_devfreq_update_utilization(struct panfrost_devfreq *pfdevfr
 static int panfrost_devfreq_target(struct device *dev, unsigned long *freq,
 				   u32 flags)
 {
+	struct panfrost_device *ptdev = dev_get_drvdata(dev);
 	struct dev_pm_opp *opp;
+	int err;
 
 	opp = devfreq_recommended_opp(dev, freq, flags);
 	if (IS_ERR(opp))
 		return PTR_ERR(opp);
 	dev_pm_opp_put(opp);
 
-	return dev_pm_opp_set_rate(dev, *freq);
+	err =  dev_pm_opp_set_rate(dev, *freq);
+	if (!err)
+		ptdev->pfdevfreq.current_frequency = *freq;
+
+	return err;
 }
 
 static void panfrost_devfreq_reset(struct panfrost_devfreq *pfdevfreq)
@@ -58,7 +64,6 @@ static int panfrost_devfreq_get_dev_status(struct device *dev,
 	spin_lock_irqsave(&pfdevfreq->lock, irqflags);
 
 	panfrost_devfreq_update_utilization(pfdevfreq);
-	pfdevfreq->current_frequency = status->current_frequency;
 
 	status->total_time = ktime_to_ns(ktime_add(pfdevfreq->busy_time,
 						   pfdevfreq->idle_time));
@@ -165,6 +170,14 @@ int panfrost_devfreq_init(struct panfrost_device *pfdev)
 	panfrost_devfreq_profile.initial_freq = cur_freq;
 
 	/*
+	 * We could wait until panfrost_devfreq_target() to set this value, but
+	 * since the simple_ondemand governor works asynchronously, there's a
+	 * chance by the time someone opens the device's fdinfo file, current
+	 * frequency hasn't been updated yet, so let's just do an early set.
+	 */
+	pfdevfreq->current_frequency = cur_freq;
+
+	/*
 	 * Set the recommend OPP this will enable and configure the regulator
 	 * if any and will avoid a switch off by regulator_late_cleanup()
 	 */
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
index 0cf64456e29a..d47b40b82b0b 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -200,7 +200,7 @@ static enum drm_gem_object_status panfrost_gem_status(struct drm_gem_object *obj
 	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
 	enum drm_gem_object_status res = 0;
 
-	if (bo->base.pages)
+	if (bo->base.base.import_attach || bo->base.pages)
 		res |= DRM_GEM_OBJECT_RESIDENT;
 
 	if (bo->base.madv == PANFROST_MADV_DONTNEED)
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 4aca09cab4b8..6e537c5bd295 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -29,6 +29,7 @@
 #include <linux/pci.h>
 
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
 #include <drm/drm_file.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <drm/radeon_drm.h>
diff --git a/drivers/gpu/drm/radeon/clearstate_evergreen.h b/drivers/gpu/drm/radeon/clearstate_evergreen.h
index 63a1ffbb3ced..3b645558f133 100644
--- a/drivers/gpu/drm/radeon/clearstate_evergreen.h
+++ b/drivers/gpu/drm/radeon/clearstate_evergreen.h
@@ -1049,7 +1049,7 @@ static const struct cs_extent_def SECT_CONTEXT_defs[] =
     {SECT_CONTEXT_def_5, 0x0000a29e, 5 },
     {SECT_CONTEXT_def_6, 0x0000a2a5, 56 },
     {SECT_CONTEXT_def_7, 0x0000a2de, 290 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const u32 SECT_CLEAR_def_1[] =
 {
@@ -1060,7 +1060,7 @@ static const u32 SECT_CLEAR_def_1[] =
 static const struct cs_extent_def SECT_CLEAR_defs[] =
 {
     {SECT_CLEAR_def_1, 0x0000ffc0, 3 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const u32 SECT_CTRLCONST_def_1[] =
 {
@@ -1070,11 +1070,11 @@ static const u32 SECT_CTRLCONST_def_1[] =
 static const struct cs_extent_def SECT_CTRLCONST_defs[] =
 {
     {SECT_CTRLCONST_def_1, 0x0000f3fc, 2 },
-    { 0, 0, 0 }
+    { NULL, 0, 0 }
 };
 static const struct cs_section_def evergreen_cs_data[] = {
     { SECT_CONTEXT_defs, SECT_CONTEXT },
     { SECT_CLEAR_defs, SECT_CLEAR },
     { SECT_CTRLCONST_defs, SECT_CTRLCONST },
-    { 0, SECT_NONE }
+    { NULL, SECT_NONE }
 };
diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c
index e8fe239b9d79..324e9b765098 100644
--- a/drivers/gpu/drm/radeon/dce3_1_afmt.c
+++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c
@@ -21,6 +21,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 #include <linux/hdmi.h>
+#include <drm/drm_edid.h>
 
 #include "radeon.h"
 #include "radeon_asic.h"
diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index 4a1d5447eac1..4c06f47453fd 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -21,6 +21,7 @@
  *
  */
 #include <linux/hdmi.h>
+#include <drm/drm_edid.h>
 
 #include "dce6_afmt.h"
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index f0ae087be914..a424b86008b8 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -26,6 +26,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 
+#include <drm/drm_edid.h>
 #include <drm/drm_vblank.h>
 #include <drm/radeon_drm.h>
 #include <drm/drm_fourcc.h>
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 5f3078f8ab95..681119c91d94 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -26,6 +26,7 @@
  */
 #include <linux/hdmi.h>
 
+#include <drm/drm_edid.h>
 #include <drm/radeon_drm.h>
 #include "evergreen_hdmi.h"
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index affa9e0309b2..cfeca2694d5f 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2321,7 +2321,7 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
 	switch (prim_walk) {
 	case 1:
 		for (i = 0; i < track->num_arrays; i++) {
-			size = track->arrays[i].esize * track->max_indx * 4;
+			size = track->arrays[i].esize * track->max_indx * 4UL;
 			if (track->arrays[i].robj == NULL) {
 				DRM_ERROR("(PW %u) Vertex array %u no buffer "
 					  "bound\n", prim_walk, i);
@@ -2340,7 +2340,7 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
 		break;
 	case 2:
 		for (i = 0; i < track->num_arrays; i++) {
-			size = track->arrays[i].esize * (nverts - 1) * 4;
+			size = track->arrays[i].esize * (nverts - 1) * 4UL;
 			if (track->arrays[i].robj == NULL) {
 				DRM_ERROR("(PW %u) Vertex array %u no buffer "
 					  "bound\n", prim_walk, i);
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 638f861af80f..6cf54a747749 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -1275,7 +1275,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			return -EINVAL;
 		}
 		tmp = (reg - CB_COLOR0_BASE) / 4;
-		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
+		track->cb_color_bo_offset[tmp] = (u64)radeon_get_ib_value(p, idx) << 8;
 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->cb_color_base_last[tmp] = ib[idx];
 		track->cb_color_bo[tmp] = reloc->robj;
@@ -1302,7 +1302,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 					"0x%04X\n", reg);
 			return -EINVAL;
 		}
-		track->htile_offset = radeon_get_ib_value(p, idx) << 8;
+		track->htile_offset = (u64)radeon_get_ib_value(p, idx) << 8;
 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->htile_bo = reloc->robj;
 		track->db_dirty = true;
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 85c4bb186203..3596ea4a8b60 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -27,6 +27,7 @@
 #include <linux/pci.h>
 
 #include <drm/drm_device.h>
+#include <drm/drm_edid.h>
 #include <drm/radeon_drm.h>
 
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index 279bf130a18c..91b58fbc2be7 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -27,6 +27,7 @@
 
 #include <drm/drm_crtc.h>
 #include <drm/drm_eld.h>
+#include <drm/drm_edid.h>
 #include "dce6_afmt.h"
 #include "evergreen_hdmi.h"
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h
index 05e67867469b..dacaaa007051 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.h
+++ b/drivers/gpu/drm/radeon/radeon_audio.h
@@ -27,7 +27,9 @@
 
 #include <linux/types.h>
 
-#define RREG32_ENDPOINT(block, reg)		\
+struct cea_sad;
+
+#define RREG32_ENDPOINT(block, reg)				\
 	radeon_audio_endpoint_rreg(rdev, (block), (reg))
 #define WREG32_ENDPOINT(block, reg, v)	\
 	radeon_audio_endpoint_wreg(rdev, (block), (reg), (v))
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 2620efc7c675..6952b1273b0f 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 
 #include <drm/drm_device.h>
+#include <drm/drm_edid.h>
 #include <drm/radeon_drm.h>
 
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 901e75ec70ff..efd18c8d84c8 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -687,11 +687,16 @@ static void radeon_crtc_init(struct drm_device *dev, int index)
 	if (radeon_crtc == NULL)
 		return;
 
+	radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0);
+	if (!radeon_crtc->flip_queue) {
+		kfree(radeon_crtc);
+		return;
+	}
+
 	drm_crtc_init(dev, &radeon_crtc->base, &radeon_crtc_funcs);
 
 	drm_mode_crtc_set_gamma_size(&radeon_crtc->base, 256);
 	radeon_crtc->crtc_id = index;
-	radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0);
 	rdev->mode_info.crtcs[index] = radeon_crtc;
 
 	if (rdev->family >= CHIP_BONAIRE) {
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index 9cb6401fe97e..3de3dce9e89d 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -26,6 +26,7 @@
 
 #include <linux/pci.h>
 
+#include <drm/drm_edid.h>
 #include <drm/drm_device.h>
 #include <drm/radeon_drm.h>
 
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 1decdcec0264..59c4db13d90a 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -32,13 +32,13 @@
 
 #include <drm/display/drm_dp_helper.h>
 #include <drm/drm_crtc.h>
-#include <drm/drm_edid.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_fixed.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
 
+struct edid;
 struct radeon_bo;
 struct radeon_device;
 
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index e6534fa9f1fb..38048593bb4a 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -413,6 +413,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 			dev_err(rdev->dev, "(%d) ring map failed\n", r);
 			return r;
 		}
+		radeon_debugfs_ring_init(rdev, ring);
 	}
 	ring->ptr_mask = (ring->ring_size / 4) - 1;
 	ring->ring_free_dw = ring->ring_size / 4;
@@ -421,7 +422,6 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 		ring->next_rptr_gpu_addr = rdev->wb.gpu_addr + index;
 		ring->next_rptr_cpu_addr = &rdev->wb.wb[index/4];
 	}
-	radeon_debugfs_ring_init(rdev, ring);
 	radeon_ring_lockup_update(rdev, ring);
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 987cabbf1318..c38b4d5d6a14 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -1204,13 +1204,17 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 	r = radeon_bo_create(rdev, pd_size, align, true,
 			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
 			     NULL, &vm->page_directory);
-	if (r)
+	if (r) {
+		kfree(vm->page_tables);
+		vm->page_tables = NULL;
 		return r;
-
+	}
 	r = radeon_vm_clear_bo(rdev, vm->page_directory);
 	if (r) {
 		radeon_bo_unref(&vm->page_directory);
 		vm->page_directory = NULL;
+		kfree(vm->page_tables);
+		vm->page_tables = NULL;
 		return r;
 	}
 
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index a91012447b56..85e9cba49cec 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -3611,6 +3611,10 @@ static int si_cp_start(struct radeon_device *rdev)
 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
 		ring = &rdev->ring[i];
 		r = radeon_ring_lock(rdev, ring, 2);
+		if (r) {
+			DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+			return r;
+		}
 
 		/* clear the compute context state */
 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c
index f74f381af05f..d49c145db437 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.c
+++ b/drivers/gpu/drm/radeon/sumo_dpm.c
@@ -1493,8 +1493,10 @@ static int sumo_parse_power_table(struct radeon_device *rdev)
 		non_clock_array_index = power_state->v2.nonClockInfoIndex;
 		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
 			&non_clock_info_array->nonClockInfo[non_clock_array_index];
-		if (!rdev->pm.power_state[i].clock_info)
+		if (!rdev->pm.power_state[i].clock_info) {
+			kfree(rdev->pm.dpm.ps);
 			return -EINVAL;
+		}
 		ps = kzalloc(sizeof(struct sumo_ps), GFP_KERNEL);
 		if (ps == NULL) {
 			kfree(rdev->pm.dpm.ps);
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index 08ea1c864cb2..ef1cc7bad20a 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -1726,8 +1726,10 @@ static int trinity_parse_power_table(struct radeon_device *rdev)
 		non_clock_array_index = power_state->v2.nonClockInfoIndex;
 		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
 			&non_clock_info_array->nonClockInfo[non_clock_array_index];
-		if (!rdev->pm.power_state[i].clock_info)
+		if (!rdev->pm.power_state[i].clock_info) {
+			kfree(rdev->pm.dpm.ps);
 			return -EINVAL;
+		}
 		ps = kzalloc(sizeof(struct sumo_ps), GFP_KERNEL);
 		if (ps == NULL) {
 			kfree(rdev->pm.dpm.ps);
diff --git a/drivers/gpu/drm/tests/drm_exec_test.c b/drivers/gpu/drm/tests/drm_exec_test.c
index 563949d777dd..81f928a429ba 100644
--- a/drivers/gpu/drm/tests/drm_exec_test.c
+++ b/drivers/gpu/drm/tests/drm_exec_test.c
@@ -46,7 +46,7 @@ static void sanitycheck(struct kunit *test)
 {
 	struct drm_exec exec;
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_fini(&exec);
 	KUNIT_SUCCEED(test);
 }
@@ -60,7 +60,7 @@ static void test_lock(struct kunit *test)
 
 	drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE);
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec) {
 		ret = drm_exec_lock_obj(&exec, &gobj);
 		drm_exec_retry_on_contention(&exec);
@@ -80,7 +80,7 @@ static void test_lock_unlock(struct kunit *test)
 
 	drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE);
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec) {
 		ret = drm_exec_lock_obj(&exec, &gobj);
 		drm_exec_retry_on_contention(&exec);
@@ -107,7 +107,7 @@ static void test_duplicates(struct kunit *test)
 
 	drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE);
 
-	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES);
+	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
 	drm_exec_until_all_locked(&exec) {
 		ret = drm_exec_lock_obj(&exec, &gobj);
 		drm_exec_retry_on_contention(&exec);
@@ -134,7 +134,7 @@ static void test_prepare(struct kunit *test)
 
 	drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE);
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec) {
 		ret = drm_exec_prepare_obj(&exec, &gobj, 1);
 		drm_exec_retry_on_contention(&exec);
@@ -159,7 +159,7 @@ static void test_prepare_array(struct kunit *test)
 	drm_gem_private_object_init(priv->drm, &gobj1, PAGE_SIZE);
 	drm_gem_private_object_init(priv->drm, &gobj2, PAGE_SIZE);
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec)
 		ret = drm_exec_prepare_array(&exec, array, ARRAY_SIZE(array),
 					     1);
@@ -174,14 +174,14 @@ static void test_multiple_loops(struct kunit *test)
 {
 	struct drm_exec exec;
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec)
 	{
 		break;
 	}
 	drm_exec_fini(&exec);
 
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
 	drm_exec_until_all_locked(&exec)
 	{
 		break;
diff --git a/drivers/gpu/drm/xe/.gitignore b/drivers/gpu/drm/xe/.gitignore
new file mode 100644
index 000000000000..8778bf132674
--- /dev/null
+++ b/drivers/gpu/drm/xe/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+*.hdrtest
+/generated
+/xe_gen_wa_oob
diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig
new file mode 100644
index 000000000000..9590eac91af3
--- /dev/null
+++ b/drivers/gpu/drm/xe/.kunitconfig
@@ -0,0 +1,13 @@
+# xe dependencies
+CONFIG_KUNIT=y
+CONFIG_PCI=y
+CONFIG_PCI_IOV=y
+CONFIG_DEBUG_FS=y
+CONFIG_DRM=y
+CONFIG_DRM_FBDEV_EMULATION=y
+CONFIG_DRM_KMS_HELPER=y
+CONFIG_DRM_XE=y
+CONFIG_DRM_XE_DISPLAY=n
+CONFIG_EXPERT=y
+CONFIG_FB=y
+CONFIG_DRM_XE_KUNIT_TEST=y
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
new file mode 100644
index 000000000000..1cced50d8d8c
--- /dev/null
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config DRM_XE
+	tristate "Intel Xe Graphics"
+	depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) && 64BIT
+	select INTERVAL_TREE
+	# we need shmfs for the swappable backing store, and in particular
+	# the shmem_readpage() which depends upon tmpfs
+	select SHMEM
+	select TMPFS
+	select DRM_BUDDY
+	select DRM_EXEC
+	select DRM_KMS_HELPER
+	select DRM_PANEL
+	select DRM_SUBALLOC_HELPER
+	select DRM_DISPLAY_DP_HELPER
+	select DRM_DISPLAY_HDCP_HELPER
+	select DRM_DISPLAY_HDMI_HELPER
+	select DRM_DISPLAY_HELPER
+	select DRM_MIPI_DSI
+	select RELAY
+	select IRQ_WORK
+	# xe depends on ACPI_VIDEO when ACPI is enabled
+	# but for select to work, need to select ACPI_VIDEO's dependencies, ick
+	select BACKLIGHT_CLASS_DEVICE if ACPI
+	select INPUT if ACPI
+	select ACPI_VIDEO if X86 && ACPI
+	select ACPI_BUTTON if ACPI
+	select ACPI_WMI if X86 && ACPI
+	select SYNC_FILE
+	select IOSF_MBI
+	select CRC32
+	select SND_HDA_I915 if SND_HDA_CORE
+	select CEC_CORE if CEC_NOTIFIER
+	select VMAP_PFN
+	select DRM_TTM
+	select DRM_TTM_HELPER
+	select DRM_EXEC
+	select DRM_GPUVM
+	select DRM_SCHED
+	select MMU_NOTIFIER
+	select WANT_DEV_COREDUMP
+	select AUXILIARY_BUS
+	help
+	  Experimental driver for Intel Xe series GPUs
+
+	  If "M" is selected, the module will be called xe.
+
+config DRM_XE_DISPLAY
+	bool "Enable display support"
+	depends on DRM_XE && EXPERT && DRM_XE=m
+	select FB_IOMEM_HELPERS
+	select I2C
+	select I2C_ALGOBIT
+	default y
+	help
+	  Disable this option only if you want to compile out display support.
+
+config DRM_XE_FORCE_PROBE
+	string "Force probe xe for selected Intel hardware IDs"
+	depends on DRM_XE
+	help
+	  This is the default value for the xe.force_probe module
+	  parameter. Using the module parameter overrides this option.
+
+	  Force probe the xe for Intel graphics devices that are
+	  recognized but not properly supported by this kernel version. It is
+	  recommended to upgrade to a kernel version with proper support as soon
+	  as it is available.
+
+	  It can also be used to block the probe of recognized and fully
+	  supported devices.
+
+	  Use "" to disable force probe. If in doubt, use this.
+
+	  Use "<pci-id>[,<pci-id>,...]" to force probe the xe for listed
+	  devices. For example, "4500" or "4500,4571".
+
+	  Use "*" to force probe the driver for all known devices.
+
+	  Use "!" right before the ID to block the probe of the device. For
+	  example, "4500,!4571" forces the probe of 4500 and blocks the probe of
+	  4571.
+
+	  Use "!*" to block the probe of the driver for all known devices.
+
+menu "drm/Xe Debugging"
+depends on DRM_XE
+depends on EXPERT
+source "drivers/gpu/drm/xe/Kconfig.debug"
+endmenu
+
+menu "drm/xe Profile Guided Optimisation"
+	visible if EXPERT
+	depends on DRM_XE
+	source "drivers/gpu/drm/xe/Kconfig.profile"
+endmenu
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
new file mode 100644
index 000000000000..549065f57a78
--- /dev/null
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config DRM_XE_WERROR
+	bool "Force GCC to throw an error instead of a warning when compiling"
+	# As this may inadvertently break the build, only allow the user
+	# to shoot oneself in the foot iff they aim really hard
+	depends on EXPERT
+	# We use the dependency on !COMPILE_TEST to not be enabled in
+	# allmodconfig or allyesconfig configurations
+	depends on !COMPILE_TEST
+	default n
+	help
+	  Add -Werror to the build flags for (and only for) xe.ko.
+	  Do not enable this unless you are writing code for the xe.ko module.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG
+	bool "Enable additional driver debugging"
+	depends on DRM_XE
+	depends on EXPERT
+	depends on !COMPILE_TEST
+	default n
+	help
+	  Choose this option to turn on extra driver debugging that may affect
+	  performance but will catch some internal issues.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG_VM
+	bool "Enable extra VM debugging info"
+	default n
+	help
+	  Enable extra VM debugging info
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG_SRIOV
+	bool "Enable extra SR-IOV debugging"
+	default n
+	help
+	  Enable extra SR-IOV debugging info.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_DEBUG_MEM
+	bool "Enable passing SYS/VRAM addresses to user space"
+	default n
+	help
+	  Pass object location trough uapi. Intended for extended
+	  testing and development only.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_SIMPLE_ERROR_CAPTURE
+	bool "Enable simple error capture to dmesg on job timeout"
+	default n
+	help
+	  Choose this option when debugging an unexpected job timeout
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_KUNIT_TEST
+        tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS
+	depends on DRM_XE && KUNIT && DEBUG_FS
+	default KUNIT_ALL_TESTS
+	select DRM_EXPORT_FOR_TESTS if m
+	select DRM_KUNIT_TEST_HELPERS
+	help
+	  Choose this option to allow the driver to perform selftests under
+	  the kunit framework
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_XE_LARGE_GUC_BUFFER
+        bool "Enable larger guc log buffer"
+        default n
+        help
+          Choose this option when debugging guc issues.
+          Buffer should be large enough for complex issues.
+
+          Recommended for driver developers only.
+
+          If in doubt, say "N".
+
+config DRM_XE_USERPTR_INVAL_INJECT
+       bool "Inject userptr invalidation -EINVAL errors"
+       default n
+       help
+         Choose this option when debugging error paths that
+	 are hit during checks for userptr invalidations.
+
+	 Recomended for driver developers only.
+	 If in doubt, say "N".
diff --git a/drivers/gpu/drm/xe/Kconfig.profile b/drivers/gpu/drm/xe/Kconfig.profile
new file mode 100644
index 000000000000..ba17a25e8db3
--- /dev/null
+++ b/drivers/gpu/drm/xe/Kconfig.profile
@@ -0,0 +1,54 @@
+config DRM_XE_JOB_TIMEOUT_MAX
+	int "Default max job timeout (ms)"
+	default 10000 # milliseconds
+	help
+	  Configures the default max job timeout after which job will
+	  be forcefully taken away from scheduler.
+config DRM_XE_JOB_TIMEOUT_MIN
+	int "Default min job timeout (ms)"
+	default 1 # milliseconds
+	help
+	  Configures the default min job timeout after which job will
+	  be forcefully taken away from scheduler.
+config DRM_XE_TIMESLICE_MAX
+	int "Default max timeslice duration (us)"
+	default 10000000 # microseconds
+	help
+	  Configures the default max timeslice duration between multiple
+	  contexts by guc scheduling.
+config DRM_XE_TIMESLICE_MIN
+	int "Default min timeslice duration (us)"
+	default 1 # microseconds
+	help
+	  Configures the default min timeslice duration between multiple
+	  contexts by guc scheduling.
+config DRM_XE_PREEMPT_TIMEOUT
+	int "Preempt timeout (us, jiffy granularity)"
+	default 640000 # microseconds
+	help
+	  How long to wait (in microseconds) for a preemption event to occur
+	  when submitting a new context. If the current context does not hit
+	  an arbitration point and yield to HW before the timer expires, the
+	  HW will be reset to allow the more important context to execute.
+config DRM_XE_PREEMPT_TIMEOUT_MAX
+	int "Default max preempt timeout (us)"
+	default 10000000 # microseconds
+	help
+	  Configures the default max preempt timeout after which context
+	  will be forcefully taken away and higher priority context will
+	  run.
+config DRM_XE_PREEMPT_TIMEOUT_MIN
+	int "Default min preempt timeout (us)"
+	default 1 # microseconds
+	help
+	  Configures the default min preempt timeout after which context
+	  will be forcefully taken away and higher priority context will
+	  run.
+config DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT
+	bool "Default configuration of limitation on scheduler timeout"
+	default y
+	help
+	  Configures the enablement of limitation on scheduler timeout
+	  to apply to applicable user. For elevated user, all above MIN
+	  and MAX values will apply when this configuration is enable to
+	  apply limitation. By default limitation is applied.
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
new file mode 100644
index 000000000000..53bd2a8ba1ae
--- /dev/null
+++ b/drivers/gpu/drm/xe/Makefile
@@ -0,0 +1,305 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the drm device driver.  This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+# Unconditionally enable W=1 warnings locally
+# --- begin copy-paste W=1 warnings from scripts/Makefile.extrawarn
+subdir-ccflags-y += -Wextra -Wunused -Wno-unused-parameter
+subdir-ccflags-y += -Wmissing-declarations
+subdir-ccflags-y += $(call cc-option, -Wrestrict)
+subdir-ccflags-y += -Wmissing-format-attribute
+subdir-ccflags-y += -Wmissing-prototypes
+subdir-ccflags-y += -Wold-style-definition
+subdir-ccflags-y += -Wmissing-include-dirs
+subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
+subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
+subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
+subdir-ccflags-y += $(call cc-option, -Wformat-overflow)
+subdir-ccflags-y += $(call cc-option, -Wformat-truncation)
+subdir-ccflags-y += $(call cc-option, -Wstringop-overflow)
+subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
+# The following turn off the warnings enabled by -Wextra
+ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-missing-field-initializers
+subdir-ccflags-y += -Wno-type-limits
+subdir-ccflags-y += -Wno-shift-negative-value
+endif
+ifeq ($(findstring 3, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-sign-compare
+endif
+# --- end copy-paste
+
+# Enable -Werror in CI and development
+subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror
+
+subdir-ccflags-y += -I$(obj) -I$(srctree)/$(src)
+
+# generated sources
+hostprogs := xe_gen_wa_oob
+
+generated_oob := $(obj)/generated/xe_wa_oob.c $(obj)/generated/xe_wa_oob.h
+
+quiet_cmd_wa_oob = GEN     $(notdir $(generated_oob))
+      cmd_wa_oob = mkdir -p $(@D); $^ $(generated_oob)
+
+$(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules
+	$(call cmd,wa_oob)
+
+uses_generated_oob := \
+	$(obj)/xe_gsc.o \
+	$(obj)/xe_guc.o \
+	$(obj)/xe_migrate.o \
+	$(obj)/xe_ring_ops.o \
+	$(obj)/xe_vm.o \
+	$(obj)/xe_wa.o \
+	$(obj)/xe_ttm_stolen_mgr.o
+
+$(uses_generated_oob): $(generated_oob)
+
+# Please keep these build lists sorted!
+
+# core driver code
+
+xe-y += xe_bb.o \
+	xe_bo.o \
+	xe_bo_evict.o \
+	xe_debugfs.o \
+	xe_devcoredump.o \
+	xe_device.o \
+	xe_device_sysfs.o \
+	xe_dma_buf.o \
+	xe_drm_client.o \
+	xe_exec.o \
+	xe_execlist.o \
+	xe_exec_queue.o \
+	xe_force_wake.o \
+	xe_ggtt.o \
+	xe_gpu_scheduler.o \
+	xe_gsc.o \
+	xe_gsc_submit.o \
+	xe_gt.o \
+	xe_gt_ccs_mode.o \
+	xe_gt_clock.o \
+	xe_gt_debugfs.o \
+	xe_gt_freq.o \
+	xe_gt_idle.o \
+	xe_gt_mcr.o \
+	xe_gt_pagefault.o \
+	xe_gt_sysfs.o \
+	xe_gt_throttle_sysfs.o \
+	xe_gt_tlb_invalidation.o \
+	xe_gt_topology.o \
+	xe_guc.o \
+	xe_guc_ads.o \
+	xe_guc_ct.o \
+	xe_guc_debugfs.o \
+	xe_guc_hwconfig.o \
+	xe_guc_log.o \
+	xe_guc_pc.o \
+	xe_guc_submit.o \
+	xe_heci_gsc.o \
+	xe_hw_engine.o \
+	xe_hw_engine_class_sysfs.o \
+	xe_hw_fence.o \
+	xe_huc.o \
+	xe_huc_debugfs.o \
+	xe_irq.o \
+	xe_lrc.o \
+	xe_migrate.o \
+	xe_mmio.o \
+	xe_mocs.o \
+	xe_module.o \
+	xe_pat.o \
+	xe_pci.o \
+	xe_pcode.o \
+	xe_pm.o \
+	xe_preempt_fence.o \
+	xe_pt.o \
+	xe_pt_walk.o \
+	xe_query.o \
+	xe_range_fence.o \
+	xe_reg_sr.o \
+	xe_reg_whitelist.o \
+	xe_rtp.o \
+	xe_ring_ops.o \
+	xe_sa.o \
+	xe_sched_job.o \
+	xe_step.o \
+	xe_sync.o \
+	xe_tile.o \
+	xe_tile_sysfs.o \
+	xe_trace.o \
+	xe_ttm_sys_mgr.o \
+	xe_ttm_stolen_mgr.o \
+	xe_ttm_vram_mgr.o \
+	xe_tuning.o \
+	xe_uc.o \
+	xe_uc_debugfs.o \
+	xe_uc_fw.o \
+	xe_vm.o \
+	xe_wait_user_fence.o \
+	xe_wa.o \
+	xe_wopcm.o
+
+# graphics hardware monitoring (HWMON) support
+xe-$(CONFIG_HWMON) += xe_hwmon.o
+
+# graphics virtualization (SR-IOV) support
+xe-y += xe_sriov.o
+
+xe-$(CONFIG_PCI_IOV) += \
+	xe_lmtt.o \
+	xe_lmtt_2l.o \
+	xe_lmtt_ml.o
+
+# i915 Display compat #defines and #includes
+subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
+	-I$(srctree)/$(src)/display/ext \
+	-I$(srctree)/$(src)/compat-i915-headers \
+	-I$(srctree)/drivers/gpu/drm/xe/display/ \
+	-I$(srctree)/drivers/gpu/drm/i915/display/ \
+	-Ddrm_i915_gem_object=xe_bo \
+	-Ddrm_i915_private=xe_device
+
+CFLAGS_i915-display/intel_fbdev.o = $(call cc-disable-warning, override-init)
+CFLAGS_i915-display/intel_display_device.o = $(call cc-disable-warning, override-init)
+
+# Rule to build SOC code shared with i915
+$(obj)/i915-soc/%.o: $(srctree)/drivers/gpu/drm/i915/soc/%.c FORCE
+	$(call cmd,force_checksrc)
+	$(call if_changed_rule,cc_o_c)
+
+# Rule to build display code shared with i915
+$(obj)/i915-display/%.o: $(srctree)/drivers/gpu/drm/i915/display/%.c FORCE
+	$(call cmd,force_checksrc)
+	$(call if_changed_rule,cc_o_c)
+
+# Display code specific to xe
+xe-$(CONFIG_DRM_XE_DISPLAY) += \
+	xe_display.o \
+	display/xe_fb_pin.o \
+	display/xe_hdcp_gsc.o \
+	display/xe_plane_initial.o \
+	display/xe_display_rps.o \
+	display/xe_display_misc.o \
+	display/xe_dsb_buffer.o \
+	display/intel_fbdev_fb.o \
+	display/intel_fb_bo.o \
+	display/ext/i915_irq.o \
+	display/ext/i915_utils.o
+
+# SOC code shared with i915
+xe-$(CONFIG_DRM_XE_DISPLAY) += \
+	i915-soc/intel_dram.o \
+	i915-soc/intel_pch.o
+
+# Display code shared with i915
+xe-$(CONFIG_DRM_XE_DISPLAY) += \
+	i915-display/icl_dsi.o \
+	i915-display/intel_atomic.o \
+	i915-display/intel_atomic_plane.o \
+	i915-display/intel_audio.o \
+	i915-display/intel_backlight.o \
+	i915-display/intel_bios.o \
+	i915-display/intel_bw.o \
+	i915-display/intel_cdclk.o \
+	i915-display/intel_color.o \
+	i915-display/intel_combo_phy.o \
+	i915-display/intel_connector.o \
+	i915-display/intel_crtc.o \
+	i915-display/intel_crtc_state_dump.o \
+	i915-display/intel_cursor.o \
+	i915-display/intel_cx0_phy.o \
+	i915-display/intel_ddi.o \
+	i915-display/intel_ddi_buf_trans.o \
+	i915-display/intel_display.o \
+	i915-display/intel_display_debugfs.o \
+	i915-display/intel_display_debugfs_params.o \
+	i915-display/intel_display_device.o \
+	i915-display/intel_display_driver.o \
+	i915-display/intel_display_irq.o \
+	i915-display/intel_display_params.o \
+	i915-display/intel_display_power.o \
+	i915-display/intel_display_power_map.o \
+	i915-display/intel_display_power_well.o \
+	i915-display/intel_display_trace.o \
+	i915-display/intel_display_wa.o \
+	i915-display/intel_dkl_phy.o \
+	i915-display/intel_dmc.o \
+	i915-display/intel_dp.o \
+	i915-display/intel_dp_aux.o \
+	i915-display/intel_dp_aux_backlight.o \
+	i915-display/intel_dp_hdcp.o \
+	i915-display/intel_dp_link_training.o \
+	i915-display/intel_dp_mst.o \
+	i915-display/intel_dpll.o \
+	i915-display/intel_dpll_mgr.o \
+	i915-display/intel_dpt_common.o \
+	i915-display/intel_drrs.o \
+	i915-display/intel_dsb.o \
+	i915-display/intel_dsi.o \
+	i915-display/intel_dsi_dcs_backlight.o \
+	i915-display/intel_dsi_vbt.o \
+	i915-display/intel_fb.o \
+	i915-display/intel_fbc.o \
+	i915-display/intel_fdi.o \
+	i915-display/intel_fifo_underrun.o \
+	i915-display/intel_frontbuffer.o \
+	i915-display/intel_global_state.o \
+	i915-display/intel_gmbus.o \
+	i915-display/intel_hdcp.o \
+	i915-display/intel_hdmi.o \
+	i915-display/intel_hotplug.o \
+	i915-display/intel_hotplug_irq.o \
+	i915-display/intel_hti.o \
+	i915-display/intel_link_bw.o \
+	i915-display/intel_lspcon.o \
+	i915-display/intel_modeset_lock.o \
+	i915-display/intel_modeset_setup.o \
+	i915-display/intel_modeset_verify.o \
+	i915-display/intel_panel.o \
+	i915-display/intel_pipe_crc.o \
+	i915-display/intel_pmdemand.o \
+	i915-display/intel_pps.o \
+	i915-display/intel_psr.o \
+	i915-display/intel_qp_tables.o \
+	i915-display/intel_quirks.o \
+	i915-display/intel_snps_phy.o \
+	i915-display/intel_tc.o \
+	i915-display/intel_vblank.o \
+	i915-display/intel_vdsc.o \
+	i915-display/intel_vga.o \
+	i915-display/intel_vrr.o \
+	i915-display/intel_wm.o \
+	i915-display/skl_scaler.o \
+	i915-display/skl_universal_plane.o \
+	i915-display/skl_watermark.o
+
+ifeq ($(CONFIG_ACPI),y)
+	xe-$(CONFIG_DRM_XE_DISPLAY) += \
+		i915-display/intel_acpi.o \
+		i915-display/intel_opregion.o
+endif
+
+ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y)
+	xe-$(CONFIG_DRM_XE_DISPLAY) += i915-display/intel_fbdev.o
+endif
+
+obj-$(CONFIG_DRM_XE) += xe.o
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
+
+# header test
+hdrtest_find_args := -not -path xe_rtp_helpers.h
+ifneq ($(CONFIG_DRM_XE_DISPLAY),y)
+	hdrtest_find_args += -not -path display/\* -not -path compat-i915-headers/\* -not -path xe_display.h
+endif
+
+always-$(CONFIG_DRM_XE_WERROR) += \
+	$(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h' $(hdrtest_find_args)))
+
+quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
+      cmd_hdrtest = $(CC) -DHDRTEST $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@
+
+$(obj)/%.hdrtest: $(src)/%.h FORCE
+	$(call if_changed_dep,hdrtest)
diff --git a/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h b/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h
new file mode 100644
index 000000000000..a4c2646803b5
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GSC_COMMAND_HEADER_ABI_H
+#define _ABI_GSC_COMMAND_HEADER_ABI_H
+
+#include <linux/types.h>
+
+struct intel_gsc_mtl_header {
+	u32 validity_marker;
+#define GSC_HECI_VALIDITY_MARKER 0xA578875A
+
+	u8 heci_client_id;
+
+	u8 reserved1;
+
+	u16 header_version;
+#define MTL_GSC_HEADER_VERSION 1
+
+	/* FW allows host to decide host_session handle as it sees fit. */
+	u64 host_session_handle;
+
+	/* handle generated by FW for messages that need to be re-submitted */
+	u64 gsc_message_handle;
+
+	u32 message_size; /* lower 20 bits only, upper 12 are reserved */
+
+	/*
+	 * Flags mask:
+	 * Bit 0: Pending
+	 * Bit 1: Session Cleanup;
+	 * Bits 2-15: Flags
+	 * Bits 16-31: Extension Size
+	 * According to internal spec flags are either input or output
+	 * we distinguish the flags using OUTFLAG or INFLAG
+	 */
+	u32 flags;
+#define GSC_OUTFLAG_MSG_PENDING	BIT(0)
+#define GSC_INFLAG_MSG_CLEANUP	BIT(1)
+
+	u32 status;
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h
new file mode 100644
index 000000000000..ad4d041873ab
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GSC_MKHI_COMMANDS_ABI_H
+#define _ABI_GSC_MKHI_COMMANDS_ABI_H
+
+#include <linux/types.h>
+
+/* Heci client ID for MKHI commands */
+#define HECI_MEADDRESS_MKHI 7
+
+/* Generic MKHI header */
+struct gsc_mkhi_header {
+	u8  group_id;
+	u8  command;
+	u8  reserved;
+	u8  result;
+} __packed;
+
+/* GFX_SRV commands */
+#define MKHI_GROUP_ID_GFX_SRV 0x30
+
+#define MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION (0x42)
+
+struct gsc_get_compatibility_version_in {
+	struct gsc_mkhi_header header;
+} __packed;
+
+struct gsc_get_compatibility_version_out {
+	struct gsc_mkhi_header header;
+	u16 proj_major;
+	u16 compat_major;
+	u16 compat_minor;
+	u16 reserved[5];
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
new file mode 100644
index 000000000000..57520809e48d
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GSC_PXP_COMMANDS_ABI_H
+#define _ABI_GSC_PXP_COMMANDS_ABI_H
+
+#include <linux/types.h>
+
+/* Heci client ID for PXP commands */
+#define HECI_MEADDRESS_PXP 17
+
+#define PXP_APIVER(x, y) (((x) & 0xFFFF) << 16 | ((y) & 0xFFFF))
+
+/*
+ * there are a lot of status codes for PXP, but we only define the cross-API
+ * common ones that we actually can handle in the kernel driver. Other failure
+ * codes should be printed to error msg for debug.
+ */
+enum pxp_status {
+	PXP_STATUS_SUCCESS = 0x0,
+	PXP_STATUS_ERROR_API_VERSION = 0x1002,
+	PXP_STATUS_NOT_READY = 0x100e,
+	PXP_STATUS_PLATFCONFIG_KF1_NOVERIF = 0x101a,
+	PXP_STATUS_PLATFCONFIG_KF1_BAD = 0x101f,
+	PXP_STATUS_OP_NOT_PERMITTED = 0x4013
+};
+
+/* Common PXP FW message header */
+struct pxp_cmd_header {
+	u32 api_version;
+	u32 command_id;
+	union {
+		u32 status; /* out */
+		u32 stream_id; /* in */
+#define PXP_CMDHDR_EXTDATA_SESSION_VALID GENMASK(0, 0)
+#define PXP_CMDHDR_EXTDATA_APP_TYPE GENMASK(1, 1)
+#define PXP_CMDHDR_EXTDATA_SESSION_ID GENMASK(17, 2)
+	};
+	/* Length of the message (excluding the header) */
+	u32 buffer_len;
+} __packed;
+
+#define PXP43_CMDID_NEW_HUC_AUTH 0x0000003F /* MTL+ */
+
+/* PXP-Input-Packet: HUC Auth-only */
+struct pxp43_new_huc_auth_in {
+	struct pxp_cmd_header header;
+	u64 huc_base_address;
+	u32 huc_size;
+} __packed;
+
+/* PXP-Output-Packet: HUC Load and Authentication or Auth-only */
+struct pxp43_huc_auth_out {
+	struct pxp_cmd_header header;
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
new file mode 100644
index 000000000000..3062e0e0d467
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_ACTIONS_ABI_H
+#define _ABI_GUC_ACTIONS_ABI_H
+
+/**
+ * DOC: HOST2GUC_SELF_CFG
+ *
+ * This message is used by Host KMD to setup of the `GuC Self Config KLVs`_.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SELF_CFG` = 0x0508            |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 | 31:16 | **KLV_KEY** - KLV key, see `GuC Self Config KLVs`_           |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **KLV_LEN** - KLV length                                     |
+ *  |   |       |                                                              |
+ *  |   |       |   - 32 bit KLV = 1                                           |
+ *  |   |       |   - 64 bit KLV = 2                                           |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **VALUE32** - Bits 31-0 of the KLV value                     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 3 |  31:0 | **VALUE64** - Bits 63-32 of the KLV value (**KLV_LEN** = 2)  |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = **NUM** - 1 if KLV was parsed, 0 if not recognized   |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_HOST2GUC_SELF_CFG			0x0508
+
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY		(0xffff << 16)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN		(0xffff << 0)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32		GUC_HXG_REQUEST_MSG_n_DATAn
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64		GUC_HXG_REQUEST_MSG_n_DATAn
+
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_0_NUM		GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/**
+ * DOC: HOST2GUC_CONTROL_CTB
+ *
+ * This H2G action allows Vf Host to enable or disable H2G and G2H `CT Buffer`_.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_CONTROL_CTB` = 0x4509         |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **CONTROL** - control `CTB based communication`_             |
+ *  |   |       |                                                              |
+ *  |   |       |   - _`GUC_CTB_CONTROL_DISABLE` = 0                           |
+ *  |   |       |   - _`GUC_CTB_CONTROL_ENABLE` = 1                            |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = MBZ                                                  |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_HOST2GUC_CONTROL_CTB			0x4509
+
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL	GUC_HXG_REQUEST_MSG_n_DATAn
+#define   GUC_CTB_CONTROL_DISABLE			0u
+#define   GUC_CTB_CONTROL_ENABLE			1u
+
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_0_MBZ		GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/* legacy definitions */
+
+enum xe_guc_action {
+	XE_GUC_ACTION_DEFAULT = 0x0,
+	XE_GUC_ACTION_REQUEST_PREEMPTION = 0x2,
+	XE_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3,
+	XE_GUC_ACTION_ALLOCATE_DOORBELL = 0x10,
+	XE_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20,
+	XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30,
+	XE_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40,
+	XE_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302,
+	XE_GUC_ACTION_ENTER_S_STATE = 0x501,
+	XE_GUC_ACTION_EXIT_S_STATE = 0x502,
+	XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506,
+	XE_GUC_ACTION_SCHED_CONTEXT = 0x1000,
+	XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001,
+	XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
+	XE_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003,
+	XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004,
+	XE_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005,
+	XE_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006,
+	XE_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
+	XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
+	XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
+	XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B,
+	XE_GUC_ACTION_SETUP_PC_GUCRC = 0x3004,
+	XE_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+	XE_GUC_ACTION_GET_HWCONFIG = 0x4100,
+	XE_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
+	XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
+	XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
+	XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
+	XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
+	XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
+	XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
+	XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
+	XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000,
+	XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002,
+	XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003,
+	XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY = 0x6004,
+	XE_GUC_ACTION_TLB_INVALIDATION = 0x7000,
+	XE_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001,
+	XE_GUC_ACTION_TLB_INVALIDATION_ALL = 0x7002,
+	XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
+	XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
+	XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
+	XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
+	XE_GUC_ACTION_LIMIT
+};
+
+enum xe_guc_rc_options {
+	XE_GUCRC_HOST_CONTROL,
+	XE_GUCRC_FIRMWARE_CONTROL,
+};
+
+enum xe_guc_preempt_options {
+	XE_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4,
+	XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8,
+};
+
+enum xe_guc_report_status {
+	XE_GUC_REPORT_STATUS_UNKNOWN = 0x0,
+	XE_GUC_REPORT_STATUS_ACKED = 0x1,
+	XE_GUC_REPORT_STATUS_ERROR = 0x2,
+	XE_GUC_REPORT_STATUS_COMPLETE = 0x4,
+};
+
+enum xe_guc_sleep_state_status {
+	XE_GUC_SLEEP_STATE_SUCCESS = 0x1,
+	XE_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2,
+	XE_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3
+#define XE_GUC_SLEEP_STATE_INVALID_MASK 0x80000000
+};
+
+#define GUC_LOG_CONTROL_LOGGING_ENABLED	(1 << 0)
+#define GUC_LOG_CONTROL_VERBOSITY_SHIFT	4
+#define GUC_LOG_CONTROL_VERBOSITY_MASK	(0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT)
+#define GUC_LOG_CONTROL_DEFAULT_LOGGING	(1 << 8)
+
+#define XE_GUC_TLB_INVAL_TYPE_SHIFT 0
+#define XE_GUC_TLB_INVAL_MODE_SHIFT 8
+/* Flush PPC or SMRO caches along with TLB invalidation request */
+#define XE_GUC_TLB_INVAL_FLUSH_CACHE (1 << 31)
+
+enum xe_guc_tlb_invalidation_type {
+	XE_GUC_TLB_INVAL_FULL = 0x0,
+	XE_GUC_TLB_INVAL_PAGE_SELECTIVE = 0x1,
+	XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX = 0x2,
+	XE_GUC_TLB_INVAL_GUC = 0x3,
+};
+
+/*
+ * 0: Heavy mode of Invalidation:
+ * The pipeline of the engine(s) for which the invalidation is targeted to is
+ * blocked, and all the in-flight transactions are guaranteed to be Globally
+ * Observed before completing the TLB invalidation
+ * 1: Lite mode of Invalidation:
+ * TLBs of the targeted engine(s) are immediately invalidated.
+ * In-flight transactions are NOT guaranteed to be Globally Observed before
+ * completing TLB invalidation.
+ * Light Invalidation Mode is to be used only when
+ * it can be guaranteed (by SW) that the address translations remain invariant
+ * for the in-flight transactions across the TLB invalidation. In other words,
+ * this mode can be used when the TLB invalidation is intended to clear out the
+ * stale cached translations that are no longer in use. Light Invalidation Mode
+ * is much faster than the Heavy Invalidation Mode, as it does not wait for the
+ * in-flight transactions to be GOd.
+ */
+enum xe_guc_tlb_inval_mode {
+	XE_GUC_TLB_INVAL_MODE_HEAVY = 0x0,
+	XE_GUC_TLB_INVAL_MODE_LITE = 0x1,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
new file mode 100644
index 000000000000..811add10c30d
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _GUC_ACTIONS_SLPC_ABI_H_
+#define _GUC_ACTIONS_SLPC_ABI_H_
+
+#include <linux/types.h>
+
+/**
+ * DOC: SLPC SHARED DATA STRUCTURE
+ *
+ *  +----+------+--------------------------------------------------------------+
+ *  | CL | Bytes| Description                                                  |
+ *  +====+======+==============================================================+
+ *  | 1  | 0-3  | SHARED DATA SIZE                                             |
+ *  |    +------+--------------------------------------------------------------+
+ *  |    | 4-7  | GLOBAL STATE                                                 |
+ *  |    +------+--------------------------------------------------------------+
+ *  |    | 8-11 | DISPLAY DATA ADDRESS                                         |
+ *  |    +------+--------------------------------------------------------------+
+ *  |    | 12:63| PADDING                                                      |
+ *  +----+------+--------------------------------------------------------------+
+ *  |    | 0:63 | PADDING(PLATFORM INFO)                                       |
+ *  +----+------+--------------------------------------------------------------+
+ *  | 3  | 0-3  | TASK STATE DATA                                              |
+ *  +    +------+--------------------------------------------------------------+
+ *  |    | 4:63 | PADDING                                                      |
+ *  +----+------+--------------------------------------------------------------+
+ *  |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS                               |
+ *  +----+------+--------------------------------------------------------------+
+ *  |    |      | PADDING + EXTRA RESERVED PAGE                                |
+ *  +----+------+--------------------------------------------------------------+
+ */
+
+/*
+ * SLPC exposes certain parameters for global configuration by the host.
+ * These are referred to as override parameters, because in most cases
+ * the host will not need to modify the default values used by SLPC.
+ * SLPC remembers the default values which allows the host to easily restore
+ * them by simply unsetting the override. The host can set or unset override
+ * parameters during SLPC (re-)initialization using the SLPC Reset event.
+ * The host can also set or unset override parameters on the fly using the
+ * Parameter Set and Parameter Unset events
+ */
+
+#define SLPC_MAX_OVERRIDE_PARAMETERS		256
+#define SLPC_OVERRIDE_BITFIELD_SIZE \
+		(SLPC_MAX_OVERRIDE_PARAMETERS / 32)
+
+#define SLPC_PAGE_SIZE_BYTES			4096
+#define SLPC_CACHELINE_SIZE_BYTES		64
+#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER	SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO	SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE	SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE	SLPC_PAGE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_MAX		(2 * SLPC_PAGE_SIZE_BYTES)
+
+/*
+ * Cacheline size aligned (Total size needed for
+ * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes)
+ */
+#define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES	(((((SLPC_MAX_OVERRIDE_PARAMETERS * 4) \
+						+ ((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \
+		+ (SLPC_CACHELINE_SIZE_BYTES - 1)) / SLPC_CACHELINE_SIZE_BYTES) * \
+					SLPC_CACHELINE_SIZE_BYTES)
+
+#define SLPC_SHARED_DATA_SIZE_BYTE_OTHER	(SLPC_SHARED_DATA_SIZE_BYTE_MAX - \
+					(SLPC_SHARED_DATA_SIZE_BYTE_HEADER \
+					+ SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO \
+					+ SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE \
+					+ SLPC_OVERRIDE_PARAMS_TOTAL_BYTES \
+					+ SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE))
+
+enum slpc_task_enable {
+	SLPC_PARAM_TASK_DEFAULT = 0,
+	SLPC_PARAM_TASK_ENABLED,
+	SLPC_PARAM_TASK_DISABLED,
+	SLPC_PARAM_TASK_UNKNOWN
+};
+
+enum slpc_global_state {
+	SLPC_GLOBAL_STATE_NOT_RUNNING = 0,
+	SLPC_GLOBAL_STATE_INITIALIZING = 1,
+	SLPC_GLOBAL_STATE_RESETTING = 2,
+	SLPC_GLOBAL_STATE_RUNNING = 3,
+	SLPC_GLOBAL_STATE_SHUTTING_DOWN = 4,
+	SLPC_GLOBAL_STATE_ERROR = 5
+};
+
+enum slpc_param_id {
+	SLPC_PARAM_TASK_ENABLE_GTPERF = 0,
+	SLPC_PARAM_TASK_DISABLE_GTPERF = 1,
+	SLPC_PARAM_TASK_ENABLE_BALANCER = 2,
+	SLPC_PARAM_TASK_DISABLE_BALANCER = 3,
+	SLPC_PARAM_TASK_ENABLE_DCC = 4,
+	SLPC_PARAM_TASK_DISABLE_DCC = 5,
+	SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ = 6,
+	SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ = 7,
+	SLPC_PARAM_GLOBAL_MIN_GT_SLICE_FREQ_MHZ = 8,
+	SLPC_PARAM_GLOBAL_MAX_GT_SLICE_FREQ_MHZ = 9,
+	SLPC_PARAM_GTPERF_THRESHOLD_MAX_FPS = 10,
+	SLPC_PARAM_GLOBAL_DISABLE_GT_FREQ_MANAGEMENT = 11,
+	SLPC_PARAM_GTPERF_ENABLE_FRAMERATE_STALLING = 12,
+	SLPC_PARAM_GLOBAL_DISABLE_RC6_MODE_CHANGE = 13,
+	SLPC_PARAM_GLOBAL_OC_UNSLICE_FREQ_MHZ = 14,
+	SLPC_PARAM_GLOBAL_OC_SLICE_FREQ_MHZ = 15,
+	SLPC_PARAM_GLOBAL_ENABLE_IA_GT_BALANCING = 16,
+	SLPC_PARAM_GLOBAL_ENABLE_ADAPTIVE_BURST_TURBO = 17,
+	SLPC_PARAM_GLOBAL_ENABLE_EVAL_MODE = 18,
+	SLPC_PARAM_GLOBAL_ENABLE_BALANCER_IN_NON_GAMING_MODE = 19,
+	SLPC_PARAM_GLOBAL_RT_MODE_TURBO_FREQ_DELTA_MHZ = 20,
+	SLPC_PARAM_PWRGATE_RC_MODE = 21,
+	SLPC_PARAM_EDR_MODE_COMPUTE_TIMEOUT_MS = 22,
+	SLPC_PARAM_EDR_QOS_FREQ_MHZ = 23,
+	SLPC_PARAM_MEDIA_FF_RATIO_MODE = 24,
+	SLPC_PARAM_ENABLE_IA_FREQ_LIMITING = 25,
+	SLPC_PARAM_STRATEGIES = 26,
+	SLPC_PARAM_POWER_PROFILE = 27,
+	SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY = 28,
+	SLPC_MAX_PARAM = 32,
+};
+
+enum slpc_media_ratio_mode {
+	SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL = 0,
+	SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE = 1,
+	SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2,
+};
+
+enum slpc_gucrc_mode {
+	SLPC_GUCRC_MODE_HW = 0,
+	SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1,
+	SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2,
+	SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3,
+
+	SLPC_GUCRC_MODE_MAX,
+};
+
+enum slpc_event_id {
+	SLPC_EVENT_RESET = 0,
+	SLPC_EVENT_SHUTDOWN = 1,
+	SLPC_EVENT_PLATFORM_INFO_CHANGE = 2,
+	SLPC_EVENT_DISPLAY_MODE_CHANGE = 3,
+	SLPC_EVENT_FLIP_COMPLETE = 4,
+	SLPC_EVENT_QUERY_TASK_STATE = 5,
+	SLPC_EVENT_PARAMETER_SET = 6,
+	SLPC_EVENT_PARAMETER_UNSET = 7,
+};
+
+struct slpc_task_state_data {
+	union {
+		u32 task_status_padding;
+		struct {
+			u32 status;
+#define SLPC_GTPERF_TASK_ENABLED	REG_BIT(0)
+#define SLPC_DCC_TASK_ENABLED		REG_BIT(11)
+#define SLPC_IN_DCC			REG_BIT(12)
+#define SLPC_BALANCER_ENABLED		REG_BIT(15)
+#define SLPC_IBC_TASK_ENABLED		REG_BIT(16)
+#define SLPC_BALANCER_IA_LMT_ENABLED	REG_BIT(17)
+#define SLPC_BALANCER_IA_LMT_ACTIVE	REG_BIT(18)
+		};
+	};
+	union {
+		u32 freq_padding;
+		struct {
+#define SLPC_MAX_UNSLICE_FREQ_MASK	REG_GENMASK(7, 0)
+#define SLPC_MIN_UNSLICE_FREQ_MASK	REG_GENMASK(15, 8)
+#define SLPC_MAX_SLICE_FREQ_MASK	REG_GENMASK(23, 16)
+#define SLPC_MIN_SLICE_FREQ_MASK	REG_GENMASK(31, 24)
+			u32 freq;
+		};
+	};
+} __packed;
+
+struct slpc_shared_data_header {
+	/* Total size in bytes of this shared buffer. */
+	u32 size;
+	u32 global_state;
+	u32 display_data_addr;
+} __packed;
+
+struct slpc_override_params {
+	u32 bits[SLPC_OVERRIDE_BITFIELD_SIZE];
+	u32 values[SLPC_MAX_OVERRIDE_PARAMETERS];
+} __packed;
+
+struct slpc_shared_data {
+	struct slpc_shared_data_header header;
+	u8 shared_data_header_pad[SLPC_SHARED_DATA_SIZE_BYTE_HEADER -
+				sizeof(struct slpc_shared_data_header)];
+
+	u8 platform_info_pad[SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO];
+
+	struct slpc_task_state_data task_state_data;
+	u8 task_state_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE -
+				sizeof(struct slpc_task_state_data)];
+
+	struct slpc_override_params override_params;
+	u8 override_params_pad[SLPC_OVERRIDE_PARAMS_TOTAL_BYTES -
+				sizeof(struct slpc_override_params)];
+
+	u8 shared_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_OTHER];
+
+	/* PAGE 2 (4096 bytes), mode based parameter will be removed soon */
+	u8 reserved_mode_definition[4096];
+} __packed;
+
+/**
+ * DOC: SLPC H2G MESSAGE FORMAT
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_PC_SLPM_REQUEST` = 0x3003     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:8 | **EVENT_ID**                                                 |
+ *  +   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | **EVENT_ARGC** - number of data arguments                    |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **EVENT_DATA1**                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|  31:0 | ...                                                          |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |2+n|  31:0 | **EVENT_DATAn**                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST		0x3003
+
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_MIN_LEN \
+				(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS		9
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_MAX_LEN \
+		(HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \
+			HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID		(0xff << 8)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC	(0xff << 0)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N	GUC_HXG_REQUEST_MSG_n_DATAn
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
new file mode 100644
index 000000000000..3b83f907ece4
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_COMMUNICATION_CTB_ABI_H
+#define _ABI_GUC_COMMUNICATION_CTB_ABI_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+#include "guc_messages_abi.h"
+
+/**
+ * DOC: CT Buffer
+ *
+ * Circular buffer used to send `CTB Message`_
+ */
+
+/**
+ * DOC: CTB Descriptor
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |  31:0 | **HEAD** - offset (in dwords) to the last dword that was     |
+ *  |   |       | read from the `CT Buffer`_.                                  |
+ *  |   |       | It can only be updated by the receiver.                      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **TAIL** - offset (in dwords) to the last dword that was     |
+ *  |   |       | written to the `CT Buffer`_.                                 |
+ *  |   |       | It can only be updated by the sender.                        |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **STATUS** - status of the CTB                               |
+ *  |   |       |                                                              |
+ *  |   |       |   - _`GUC_CTB_STATUS_NO_ERROR` = 0 (normal operation)        |
+ *  |   |       |   - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large)     |
+ *  |   |       |   - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message)      |
+ *  |   |       |   - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified)      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|       | RESERVED = MBZ                                               |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 15|  31:0 | RESERVED = MBZ                                               |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+struct guc_ct_buffer_desc {
+	u32 head;
+	u32 tail;
+	u32 status;
+#define GUC_CTB_STATUS_NO_ERROR				0
+#define GUC_CTB_STATUS_OVERFLOW				(1 << 0)
+#define GUC_CTB_STATUS_UNDERFLOW			(1 << 1)
+#define GUC_CTB_STATUS_MISMATCH				(1 << 2)
+	u32 reserved[13];
+} __packed;
+static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
+
+/**
+ * DOC: CTB Message
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | **FENCE** - message identifier                               |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 15:12 | **FORMAT** - format of the CTB message                       |
+ *  |   |       |  - _`GUC_CTB_FORMAT_HXG` = 0 - see `CTB HXG Message`_        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  11:8 | **RESERVED**                                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | **NUM_DWORDS** - length of the CTB message (w/o header)      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | optional (depends on FORMAT)                                 |
+ *  +---+-------+                                                              |
+ *  |...|       |                                                              |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_CTB_HDR_LEN				1u
+#define GUC_CTB_MSG_MIN_LEN			GUC_CTB_HDR_LEN
+#define GUC_CTB_MSG_MAX_LEN			256u
+#define GUC_CTB_MSG_0_FENCE			(0xffff << 16)
+#define GUC_CTB_MSG_0_FORMAT			(0xf << 12)
+#define   GUC_CTB_FORMAT_HXG			0u
+#define GUC_CTB_MSG_0_RESERVED			(0xf << 8)
+#define GUC_CTB_MSG_0_NUM_DWORDS		(0xff << 0)
+
+/**
+ * DOC: CTB HXG Message
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | FENCE                                                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 15:12 | FORMAT = GUC_CTB_FORMAT_HXG_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  11:8 | RESERVED = MBZ                                               |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | [Embedded `HXG Message`_]                                    |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_CTB_HXG_MSG_MIN_LEN		(GUC_CTB_MSG_MIN_LEN + GUC_HXG_MSG_MIN_LEN)
+#define GUC_CTB_HXG_MSG_MAX_LEN		GUC_CTB_MSG_MAX_LEN
+
+/**
+ * DOC: CTB based communication
+ *
+ * The CTB (command transport buffer) communication between Host and GuC
+ * is based on u32 data stream written to the shared buffer. One buffer can
+ * be used to transmit data only in one direction (one-directional channel).
+ *
+ * Current status of the each buffer is maintained in the `CTB Descriptor`_.
+ * Each message in data stream is encoded as `CTB HXG Message`_.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h
new file mode 100644
index 000000000000..ef538e34f894
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_COMMUNICATION_MMIO_ABI_H
+#define _ABI_GUC_COMMUNICATION_MMIO_ABI_H
+
+/**
+ * DOC: GuC MMIO based communication
+ *
+ * The MMIO based communication between Host and GuC relies on special
+ * hardware registers which format could be defined by the software
+ * (so called scratch registers).
+ *
+ * Each MMIO based message, both Host to GuC (H2G) and GuC to Host (G2H)
+ * messages, which maximum length depends on number of available scratch
+ * registers, is directly written into those scratch registers.
+ *
+ * For Gen9+, there are 16 software scratch registers 0xC180-0xC1B8,
+ * but no H2G command takes more than 4 parameters and the GuC firmware
+ * itself uses an 4-element array to store the H2G message.
+ *
+ * For Gen11+, there are additional 4 registers 0x190240-0x19024C, which
+ * are, regardless on lower count, preferred over legacy ones.
+ *
+ * The MMIO based communication is mainly used during driver initialization
+ * phase to setup the `CTB based communication`_ that will be used afterwards.
+ */
+
+#define GUC_MAX_MMIO_MSG_LEN		4
+
+/**
+ * DOC: MMIO HXG Message
+ *
+ * Format of the MMIO messages follows definitions of `HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | [Embedded `HXG Message`_]                                    |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
new file mode 100644
index 000000000000..ec83551bf9c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_ERRORS_ABI_H
+#define _ABI_GUC_ERRORS_ABI_H
+
+enum xe_guc_response_status {
+	XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
+	XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
+};
+
+enum xe_guc_load_status {
+	XE_GUC_LOAD_STATUS_DEFAULT                          = 0x00,
+	XE_GUC_LOAD_STATUS_START                            = 0x01,
+	XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH       = 0x02,
+	XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH       = 0x03,
+	XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE      = 0x04,
+	XE_GUC_LOAD_STATUS_GDT_DONE                         = 0x10,
+	XE_GUC_LOAD_STATUS_IDT_DONE                         = 0x20,
+	XE_GUC_LOAD_STATUS_LAPIC_DONE                       = 0x30,
+	XE_GUC_LOAD_STATUS_GUCINT_DONE                      = 0x40,
+	XE_GUC_LOAD_STATUS_DPC_READY                        = 0x50,
+	XE_GUC_LOAD_STATUS_DPC_ERROR                        = 0x60,
+	XE_GUC_LOAD_STATUS_EXCEPTION                        = 0x70,
+	XE_GUC_LOAD_STATUS_INIT_DATA_INVALID                = 0x71,
+	XE_GUC_LOAD_STATUS_PXP_TEARDOWN_CTRL_ENABLED        = 0x72,
+	XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
+	XE_GUC_LOAD_STATUS_MPU_DATA_INVALID                 = 0x73,
+	XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID   = 0x74,
+	XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
+
+	XE_GUC_LOAD_STATUS_READY                            = 0xF0,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
new file mode 100644
index 000000000000..47094b9b044c
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_KLVS_ABI_H
+#define _ABI_GUC_KLVS_ABI_H
+
+#include <linux/types.h>
+
+/**
+ * DOC: GuC KLV
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | **KEY** - KLV key identifier                                 |
+ *  |   |       |   - `GuC Self Config KLVs`_                                  |
+ *  |   |       |   - `GuC VGT Policy KLVs`_                                   |
+ *  |   |       |   - `GuC VF Configuration KLVs`_                             |
+ *  |   |       |                                                              |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **LEN** - length of VALUE (in 32bit dwords)                  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **VALUE** - actual value of the KLV (format depends on KEY)  |
+ *  +---+-------+                                                              |
+ *  |...|       |                                                              |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_KLV_LEN_MIN				1u
+#define GUC_KLV_0_KEY				(0xffff << 16)
+#define GUC_KLV_0_LEN				(0xffff << 0)
+#define GUC_KLV_n_VALUE				(0xffffffff << 0)
+
+/**
+ * DOC: GuC Self Config KLVs
+ *
+ * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_.
+ *
+ * _`GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR` : 0x0900
+ *      Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts
+ *      status vector for use by the GuC.
+ *
+ * _`GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR` : 0x0901
+ *      Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts
+ *      source vector for use by the GuC.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_ADDR` : 0x0902
+ *      Refers to 64 bit Global Gfx address of H2G `CT Buffer`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR : 0x0903
+ *      Refers to 64 bit Global Gfx address of H2G `CTB Descriptor`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_SIZE : 0x0904
+ *      Refers to size of H2G `CT Buffer`_ in bytes.
+ *      Should be a multiple of 4K.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_ADDR : 0x0905
+ *      Refers to 64 bit Global Gfx address of G2H `CT Buffer`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR : 0x0906
+ *      Refers to 64 bit Global Gfx address of G2H `CTB Descriptor`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _GUC_KLV_SELF_CFG_G2H_CTB_SIZE : 0x0907
+ *      Refers to size of G2H `CT Buffer`_ in bytes.
+ *      Should be a multiple of 4K.
+ */
+
+#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_KEY		0x0900
+#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY		0x0901
+#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY		0x0902
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY	0x0903
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_LEN	2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY		0x0904
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_LEN		1u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY		0x0905
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY	0x0906
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_LEN	2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY		0x0907
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN		1u
+
+/*
+ * Per context scheduling policy update keys.
+ */
+enum  {
+	GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM			= 0x2001,
+	GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT			= 0x2002,
+	GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY			= 0x2003,
+	GUC_CONTEXT_POLICIES_KLV_ID_PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY	= 0x2004,
+	GUC_CONTEXT_POLICIES_KLV_ID_SLPM_GT_FREQUENCY			= 0x2005,
+
+	GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5,
+};
+
+/**
+ * DOC: GuC VGT Policy KLVs
+ *
+ * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY.
+ *
+ * _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001
+ *      This config sets whether strict scheduling is enabled whereby any VF
+ *      that doesn’t have work to submit is still allocated a fixed execution
+ *      time-slice to ensure active VFs execution is always consitent even
+ *      during other VF reprovisiong / rebooting events. Changing this KLV
+ *      impacts all VFs and takes effect on the next VF-Switch event.
+ *
+ *      :0: don't schedule idle (default)
+ *      :1: schedule if idle
+ *
+ * _`GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD` : 0x8002
+ *      This config sets the sample period for tracking adverse event counters.
+ *       A sample period is the period in millisecs during which events are counted.
+ *       This is applicable for all the VFs.
+ *
+ *      :0: adverse events are not counted (default)
+ *      :n: sample period in milliseconds
+ *
+ * _`GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH` : 0x8D00
+ *      This enum is to reset utilized HW engine after VF Switch (i.e to clean
+ *      up Stale HW register left behind by previous VF)
+ *
+ *      :0: don't reset (default)
+ *      :1: reset
+ */
+
+#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY		0x8001
+#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_LEN		1u
+
+#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY	0x8002
+#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_LEN	1u
+
+#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY	0x8D00
+#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_LEN	1u
+
+/**
+ * DOC: GuC VF Configuration KLVs
+ *
+ * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VF_CFG.
+ *
+ * _`GUC_KLV_VF_CFG_GGTT_START` : 0x0001
+ *      A 4K aligned start GTT address/offset assigned to VF.
+ *      Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_GGTT_SIZE` : 0x0002
+ *      A 4K aligned size of GGTT assigned to VF.
+ *      Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_LMEM_SIZE` : 0x0003
+ *      A 2M aligned size of local memory assigned to VF.
+ *      Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_NUM_CONTEXTS` : 0x0004
+ *      Refers to the number of contexts allocated to this VF.
+ *
+ *      :0: no contexts (default)
+ *      :1-65535: number of contexts (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_TILE_MASK` : 0x0005
+ *      For multi-tiled products, this field contains the bitwise-OR of tiles
+ *      assigned to the VF. Bit-0-set means VF has access to Tile-0,
+ *      Bit-31-set means VF has access to Tile-31, and etc.
+ *      At least one tile will always be allocated.
+ *      If all bits are zero, VF KMD should treat this as a fatal error.
+ *      For, single-tile products this KLV config is ignored.
+ *
+ * _`GUC_KLV_VF_CFG_NUM_DOORBELLS` : 0x0006
+ *      Refers to the number of doorbells allocated to this VF.
+ *
+ *      :0: no doorbells (default)
+ *      :1-255: number of doorbells (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_EXEC_QUANTUM` : 0x8A01
+ *      This config sets the VFs-execution-quantum in milliseconds.
+ *      GUC will attempt to obey the maximum values as much as HW is capable
+ *      of and this will never be perfectly-exact (accumulated nano-second
+ *      granularity) since the GPUs clock time runs off a different crystal
+ *      from the CPUs clock. Changing this KLV on a VF that is currently
+ *      running a context wont take effect until a new context is scheduled in.
+ *      That said, when the PF is changing this value from 0xFFFFFFFF to
+ *      something else, it might never take effect if the VF is running an
+ *      inifinitely long compute or shader kernel. In such a scenario, the
+ *      PF would need to trigger a VM PAUSE and then change the KLV to force
+ *      it to take effect. Such cases might typically happen on a 1PF+1VF
+ *      Virtualization config enabled for heavier workloads like AI/ML.
+ *
+ *      :0: infinite exec quantum (default)
+ *
+ * _`GUC_KLV_VF_CFG_PREEMPT_TIMEOUT` : 0x8A02
+ *      This config sets the VF-preemption-timeout in microseconds.
+ *      GUC will attempt to obey the minimum and maximum values as much as
+ *      HW is capable and this will never be perfectly-exact (accumulated
+ *      nano-second granularity) since the GPUs clock time runs off a
+ *      different crystal from the CPUs clock. Changing this KLV on a VF
+ *      that is currently running a context wont take effect until a new
+ *      context is scheduled in.
+ *      That said, when the PF is changing this value from 0xFFFFFFFF to
+ *      something else, it might never take effect if the VF is running an
+ *      inifinitely long compute or shader kernel.
+ *      In this case, the PF would need to trigger a VM PAUSE and then change
+ *      the KLV to force it to take effect. Such cases might typically happen
+ *      on a 1PF+1VF Virtualization config enabled for heavier workloads like
+ *      AI/ML.
+ *
+ *      :0: no preemption timeout (default)
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR` : 0x8A03
+ *      This config sets threshold for CAT errors caused by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET` : 0x8A04
+ *      This config sets threshold for engine reset caused by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT` : 0x8A05
+ *      This config sets threshold for page fault errors caused by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM` : 0x8A06
+ *      This config sets threshold for H2G interrupts triggered by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM` : 0x8A07
+ *      This config sets threshold for GT interrupts triggered by the VF's
+ *      workloads.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM` : 0x8A08
+ *      This config sets threshold for doorbell's ring triggered by the VF.
+ *
+ *      :0: adverse events or error will not be reported (default)
+ *      :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID` : 0x8A0A
+ *      Refers to the start index of doorbell assigned to this VF.
+ *
+ *      :0: (default)
+ *      :1-255: number of doorbells (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID` : 0x8A0B
+ *      Refers to the start index in context array allocated to this VF’s use.
+ *
+ *      :0: (default)
+ *      :1-65535: number of contexts (Gen12)
+ */
+
+#define GUC_KLV_VF_CFG_GGTT_START_KEY		0x0001
+#define GUC_KLV_VF_CFG_GGTT_START_LEN		2u
+
+#define GUC_KLV_VF_CFG_GGTT_SIZE_KEY		0x0002
+#define GUC_KLV_VF_CFG_GGTT_SIZE_LEN		2u
+
+#define GUC_KLV_VF_CFG_LMEM_SIZE_KEY		0x0003
+#define GUC_KLV_VF_CFG_LMEM_SIZE_LEN		2u
+
+#define GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY		0x0004
+#define GUC_KLV_VF_CFG_NUM_CONTEXTS_LEN		1u
+
+#define GUC_KLV_VF_CFG_TILE_MASK_KEY		0x0005
+#define GUC_KLV_VF_CFG_TILE_MASK_LEN		1u
+
+#define GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY	0x0006
+#define GUC_KLV_VF_CFG_NUM_DOORBELLS_LEN	1u
+
+#define GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY		0x8a01
+#define GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN		1u
+
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY	0x8a02
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN	1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_KEY		0x8a03
+#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_KEY	0x8a04
+#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_LEN	1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_KEY		0x8a05
+#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_KEY		0x8a06
+#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_KEY		0x8a07
+#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_LEN		1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_KEY	0x8a08
+#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_LEN	1u
+
+#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_KEY	0x8a0a
+#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_LEN	1u
+
+#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY	0x8a0b
+#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN	1u
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
new file mode 100644
index 000000000000..3d199016cf88
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_MESSAGES_ABI_H
+#define _ABI_GUC_MESSAGES_ABI_H
+
+/**
+ * DOC: HXG Message
+ *
+ * All messages exchanged with GuC are defined using 32 bit dwords.
+ * First dword is treated as a message header. Remaining dwords are optional.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  |   |       |                                                              |
+ *  | 0 |    31 | **ORIGIN** - originator of the message                       |
+ *  |   |       |   - _`GUC_HXG_ORIGIN_HOST` = 0                               |
+ *  |   |       |   - _`GUC_HXG_ORIGIN_GUC` = 1                                |
+ *  |   |       |                                                              |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | **TYPE** - message type                                      |
+ *  |   |       |   - _`GUC_HXG_TYPE_REQUEST` = 0                              |
+ *  |   |       |   - _`GUC_HXG_TYPE_EVENT` = 1                                |
+ *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3                     |
+ *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5                    |
+ *  |   |       |   - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6                     |
+ *  |   |       |   - _`GUC_HXG_TYPE_RESPONSE_SUCCESS` = 7                     |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **AUX** - auxiliary data (depends on TYPE)                   |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **PAYLOAD** - optional payload (depends on TYPE)             |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_MSG_MIN_LEN			1u
+#define GUC_HXG_MSG_0_ORIGIN			(0x1 << 31)
+#define   GUC_HXG_ORIGIN_HOST			0u
+#define   GUC_HXG_ORIGIN_GUC			1u
+#define GUC_HXG_MSG_0_TYPE			(0x7 << 28)
+#define   GUC_HXG_TYPE_REQUEST			0u
+#define   GUC_HXG_TYPE_EVENT			1u
+#define   GUC_HXG_TYPE_NO_RESPONSE_BUSY		3u
+#define   GUC_HXG_TYPE_NO_RESPONSE_RETRY	5u
+#define   GUC_HXG_TYPE_RESPONSE_FAILURE		6u
+#define   GUC_HXG_TYPE_RESPONSE_SUCCESS		7u
+#define GUC_HXG_MSG_0_AUX			(0xfffffff << 0)
+#define GUC_HXG_MSG_n_PAYLOAD			(0xffffffff << 0)
+
+/**
+ * DOC: HXG Request
+ *
+ * The `HXG Request`_ message should be used to initiate synchronous activity
+ * for which confirmation or return data is expected.
+ *
+ * The recipient of this message shall use `HXG Response`_, `HXG Failure`_
+ * or `HXG Retry`_ message as a definite reply, and may use `HXG Busy`_
+ * message as a intermediate reply.
+ *
+ * Format of @DATA0 and all @DATAn fields depends on the @ACTION code.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **DATA0** - request data (depends on ACTION)                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **ACTION** - requested action code                           |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **DATAn** - optional data (depends on ACTION)                |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_REQUEST_MSG_MIN_LEN		GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_REQUEST_MSG_0_DATA0		(0xfff << 16)
+#define GUC_HXG_REQUEST_MSG_0_ACTION		(0xffff << 0)
+#define GUC_HXG_REQUEST_MSG_n_DATAn		GUC_HXG_MSG_n_PAYLOAD
+
+/**
+ * DOC: HXG Event
+ *
+ * The `HXG Event`_ message should be used to initiate asynchronous activity
+ * that does not involves immediate confirmation nor data.
+ *
+ * Format of @DATA0 and all @DATAn fields depends on the @ACTION code.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_                                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **DATA0** - event data (depends on ACTION)                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **ACTION** - event action code                               |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **DATAn** - optional event  data (depends on ACTION)         |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_EVENT_MSG_MIN_LEN		GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_EVENT_MSG_0_DATA0		(0xfff << 16)
+#define GUC_HXG_EVENT_MSG_0_ACTION		(0xffff << 0)
+#define GUC_HXG_EVENT_MSG_n_DATAn		GUC_HXG_MSG_n_PAYLOAD
+
+/**
+ * DOC: HXG Busy
+ *
+ * The `HXG Busy`_ message may be used to acknowledge reception of the `HXG Request`_
+ * message if the recipient expects that it processing will be longer than default
+ * timeout.
+ *
+ * The @COUNTER field may be used as a progress indicator.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_BUSY_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **COUNTER** - progress indicator                             |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_BUSY_MSG_LEN			GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_BUSY_MSG_0_COUNTER		GUC_HXG_MSG_0_AUX
+
+/**
+ * DOC: HXG Retry
+ *
+ * The `HXG Retry`_ message should be used by recipient to indicate that the
+ * `HXG Request`_ message was dropped and it should be resent again.
+ *
+ * The @REASON field may be used to provide additional information.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_RETRY_                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **REASON** - reason for retry                                |
+ *  |   |       |  - _`GUC_HXG_RETRY_REASON_UNSPECIFIED` = 0                   |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_RETRY_MSG_LEN			GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_RETRY_MSG_0_REASON		GUC_HXG_MSG_0_AUX
+#define   GUC_HXG_RETRY_REASON_UNSPECIFIED	0u
+
+/**
+ * DOC: HXG Failure
+ *
+ * The `HXG Failure`_ message shall be used as a reply to the `HXG Request`_
+ * message that could not be processed due to an error.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_FAILURE_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **HINT** - additional error hint                             |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **ERROR** - error/result code                                |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_FAILURE_MSG_LEN			GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_FAILURE_MSG_0_HINT		(0xfff << 16)
+#define GUC_HXG_FAILURE_MSG_0_ERROR		(0xffff << 0)
+
+/**
+ * DOC: HXG Response
+ *
+ * The `HXG Response`_ message shall be used as a reply to the `HXG Request`_
+ * message that was successfully processed without an error.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN                                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **DATA0** - data (depends on ACTION from `HXG Request`_)     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 |                                                              |
+ *  +---+-------+                                                              |
+ *  |...|       | **DATAn** - data (depends on ACTION from `HXG Request`_)     |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_RESPONSE_MSG_MIN_LEN		GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_RESPONSE_MSG_0_DATA0		GUC_HXG_MSG_0_AUX
+#define GUC_HXG_RESPONSE_MSG_n_DATAn		GUC_HXG_MSG_n_PAYLOAD
+
+/* deprecated */
+#define INTEL_GUC_MSG_TYPE_SHIFT	28
+#define INTEL_GUC_MSG_TYPE_MASK		(0xF << INTEL_GUC_MSG_TYPE_SHIFT)
+#define INTEL_GUC_MSG_DATA_SHIFT	16
+#define INTEL_GUC_MSG_DATA_MASK		(0xFFF << INTEL_GUC_MSG_DATA_SHIFT)
+#define INTEL_GUC_MSG_CODE_SHIFT	0
+#define INTEL_GUC_MSG_CODE_MASK		(0xFFFF << INTEL_GUC_MSG_CODE_SHIFT)
+
+enum intel_guc_msg_type {
+	INTEL_GUC_MSG_TYPE_REQUEST = 0x0,
+	INTEL_GUC_MSG_TYPE_RESPONSE = 0xF,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h
new file mode 100644
index 000000000000..710cecca972d
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h
@@ -0,0 +1 @@
+/* Empty */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h
new file mode 100644
index 000000000000..650ea2803a97
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _I915_GEM_MMAN_H_
+#define _I915_GEM_MMAN_H_
+
+#include "xe_bo_types.h"
+#include <drm/drm_prime.h>
+
+static inline int i915_gem_fb_mmap(struct xe_bo *bo, struct vm_area_struct *vma)
+{
+	return drm_gem_prime_mmap(&bo->ttm.base, vma);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
new file mode 100644
index 000000000000..5f19550cc845
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _I915_GEM_OBJECT_H_
+#define _I915_GEM_OBJECT_H_
+
+#include <linux/types.h>
+
+#include "xe_bo.h"
+
+#define i915_gem_object_is_shmem(obj) ((obj)->flags & XE_BO_CREATE_SYSTEM_BIT)
+
+static inline dma_addr_t i915_gem_object_get_dma_address(const struct xe_bo *bo, pgoff_t n)
+{
+	/* Should never be called */
+	WARN_ON(1);
+	return n;
+}
+
+static inline bool i915_gem_object_is_tiled(const struct xe_bo *bo)
+{
+	/* legacy tiling is unused */
+	return false;
+}
+
+static inline bool i915_gem_object_is_userptr(const struct xe_bo *bo)
+{
+	/* legacy tiling is unused */
+	return false;
+}
+
+static inline int i915_gem_object_read_from_page(struct xe_bo *bo,
+					  u32 ofs, u64 *ptr, u32 size)
+{
+	struct ttm_bo_kmap_obj map;
+	void *virtual;
+	bool is_iomem;
+	int ret;
+
+	XE_WARN_ON(size != 8);
+
+	ret = xe_bo_lock(bo, true);
+	if (ret)
+		return ret;
+
+	ret = ttm_bo_kmap(&bo->ttm, ofs >> PAGE_SHIFT, 1, &map);
+	if (ret)
+		goto out_unlock;
+
+	ofs &= ~PAGE_MASK;
+	virtual = ttm_kmap_obj_virtual(&map, &is_iomem);
+	if (is_iomem)
+		*ptr = readq((void __iomem *)(virtual + ofs));
+	else
+		*ptr = *(u64 *)(virtual + ofs);
+
+	ttm_bo_kunmap(&map);
+out_unlock:
+	xe_bo_unlock(bo);
+	return ret;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h
new file mode 100644
index 000000000000..2a3f12d2978c
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _I915_GEM_OBJECT_FRONTBUFFER_H_
+#define _I915_GEM_OBJECT_FRONTBUFFER_H_
+
+#define i915_gem_object_get_frontbuffer(obj)		NULL
+#define i915_gem_object_set_frontbuffer(obj, front)	(front)
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h b/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h
new file mode 100644
index 000000000000..21fec9cc837c
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_RPS_H__
+#define __INTEL_RPS_H__
+
+#define gen5_rps_irq_handler(x) ({})
+
+#endif /* __INTEL_RPS_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h
new file mode 100644
index 000000000000..6f0ab3753563
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _I915_ACTIVE_H_
+#define _I915_ACTIVE_H_
+
+#include "i915_active_types.h"
+
+static inline void i915_active_init(struct i915_active *ref,
+				    int (*active)(struct i915_active *ref),
+				    void (*retire)(struct i915_active *ref),
+				    unsigned long flags)
+{
+	(void) active;
+	(void) retire;
+}
+
+#define i915_active_fini(active) do { } while (0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h
new file mode 100644
index 000000000000..8c31f9a8b168
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_ACTIVE_TYPES_H_
+#define _I915_ACTIVE_TYPES_H_
+
+struct i915_active {};
+#define I915_ACTIVE_RETIRE_SLEEPS 0
+
+#endif /* _I915_ACTIVE_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h
new file mode 100644
index 000000000000..e835bea08d1b
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __I915_CONFIG_H__
+#define __I915_CONFIG_H__
+
+#include <linux/sched.h>
+
+struct drm_i915_private;
+
+static inline unsigned long
+i915_fence_timeout(const struct drm_i915_private *i915)
+{
+	return MAX_SCHEDULE_TIMEOUT;
+}
+
+#endif /* __I915_CONFIG_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h
new file mode 100644
index 000000000000..b4c47617b64b
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __I915_DEBUGFS_H__
+#define __I915_DEBUGFS_H__
+
+struct drm_i915_gem_object;
+struct seq_file;
+
+static inline void i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) {}
+
+#endif /* __I915_DEBUGFS_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
new file mode 100644
index 000000000000..5d2a77b52db4
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+#ifndef _XE_I915_DRV_H_
+#define _XE_I915_DRV_H_
+
+/*
+ * "Adaptation header" to allow i915 display to also build for xe driver.
+ * TODO: refactor i915 and xe so this can cease to exist
+ */
+
+#include <drm/drm_drv.h>
+
+#include "gem/i915_gem_object.h"
+
+#include "soc/intel_pch.h"
+#include "xe_device.h"
+#include "xe_bo.h"
+#include "xe_pm.h"
+#include "xe_step.h"
+#include "i915_gem.h"
+#include "i915_gem_stolen.h"
+#include "i915_gpu_error.h"
+#include "i915_reg_defs.h"
+#include "i915_utils.h"
+#include "intel_gt_types.h"
+#include "intel_step.h"
+#include "intel_uc_fw.h"
+#include "intel_uncore.h"
+#include "intel_runtime_pm.h"
+#include <linux/pm_runtime.h>
+
+static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
+{
+	return container_of(dev, struct drm_i915_private, drm);
+}
+
+static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
+{
+	return dev_get_drvdata(kdev);
+}
+
+
+#define INTEL_JASPERLAKE 0
+#define INTEL_ELKHARTLAKE 0
+#define IS_PLATFORM(xe, x) ((xe)->info.platform == x)
+#define INTEL_INFO(dev_priv)	(&((dev_priv)->info))
+#define INTEL_DEVID(dev_priv)	((dev_priv)->info.devid)
+#define IS_I830(dev_priv)	(dev_priv && 0)
+#define IS_I845G(dev_priv)	(dev_priv && 0)
+#define IS_I85X(dev_priv)	(dev_priv && 0)
+#define IS_I865G(dev_priv)	(dev_priv && 0)
+#define IS_I915G(dev_priv)	(dev_priv && 0)
+#define IS_I915GM(dev_priv)	(dev_priv && 0)
+#define IS_I945G(dev_priv)	(dev_priv && 0)
+#define IS_I945GM(dev_priv)	(dev_priv && 0)
+#define IS_I965G(dev_priv)	(dev_priv && 0)
+#define IS_I965GM(dev_priv)	(dev_priv && 0)
+#define IS_G45(dev_priv)	(dev_priv && 0)
+#define IS_GM45(dev_priv)	(dev_priv && 0)
+#define IS_G4X(dev_priv)	(dev_priv && 0)
+#define IS_PINEVIEW(dev_priv)	(dev_priv && 0)
+#define IS_G33(dev_priv)	(dev_priv && 0)
+#define IS_IRONLAKE(dev_priv)	(dev_priv && 0)
+#define IS_IRONLAKE_M(dev_priv) (dev_priv && 0)
+#define IS_SANDYBRIDGE(dev_priv)	(dev_priv && 0)
+#define IS_IVYBRIDGE(dev_priv)	(dev_priv && 0)
+#define IS_IVB_GT1(dev_priv)	(dev_priv && 0)
+#define IS_VALLEYVIEW(dev_priv)	(dev_priv && 0)
+#define IS_CHERRYVIEW(dev_priv)	(dev_priv && 0)
+#define IS_HASWELL(dev_priv)	(dev_priv && 0)
+#define IS_BROADWELL(dev_priv)	(dev_priv && 0)
+#define IS_SKYLAKE(dev_priv)	(dev_priv && 0)
+#define IS_BROXTON(dev_priv)	(dev_priv && 0)
+#define IS_KABYLAKE(dev_priv)	(dev_priv && 0)
+#define IS_GEMINILAKE(dev_priv)	(dev_priv && 0)
+#define IS_COFFEELAKE(dev_priv)	(dev_priv && 0)
+#define IS_COMETLAKE(dev_priv)	(dev_priv && 0)
+#define IS_ICELAKE(dev_priv)	(dev_priv && 0)
+#define IS_JASPERLAKE(dev_priv)	(dev_priv && 0)
+#define IS_ELKHARTLAKE(dev_priv)	(dev_priv && 0)
+#define IS_TIGERLAKE(dev_priv)	IS_PLATFORM(dev_priv, XE_TIGERLAKE)
+#define IS_ROCKETLAKE(dev_priv)	IS_PLATFORM(dev_priv, XE_ROCKETLAKE)
+#define IS_DG1(dev_priv)        IS_PLATFORM(dev_priv, XE_DG1)
+#define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S)
+#define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_P)
+#define IS_XEHPSDV(dev_priv) (dev_priv && 0)
+#define IS_DG2(dev_priv)	IS_PLATFORM(dev_priv, XE_DG2)
+#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, XE_PVC)
+#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE)
+#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE)
+
+#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0)
+#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0)
+#define IS_BROADWELL_ULX(dev_priv) (dev_priv && 0)
+
+#define IP_VER(ver, rel)                ((ver) << 8 | (rel))
+
+#define INTEL_DISPLAY_ENABLED(xe) (HAS_DISPLAY((xe)) && !intel_opregion_headless_sku((xe)))
+
+#define IS_GRAPHICS_VER(xe, first, last) \
+	((xe)->info.graphics_verx100 >= first * 100 && \
+	 (xe)->info.graphics_verx100 <= (last*100 + 99))
+#define IS_MOBILE(xe) (xe && 0)
+#define HAS_LLC(xe) (!IS_DGFX((xe)))
+
+#define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270
+
+/* Workarounds not handled yet */
+#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; })
+#define IS_GRAPHICS_STEP(xe, first, last) ({u8 __step = (xe)->info.step.graphics; first <= __step && __step <= last; })
+
+#define IS_LP(xe) (0)
+#define IS_GEN9_LP(xe) (0)
+#define IS_GEN9_BC(xe) (0)
+
+#define IS_TIGERLAKE_UY(xe) (xe && 0)
+#define IS_COMETLAKE_ULX(xe) (xe && 0)
+#define IS_COFFEELAKE_ULX(xe) (xe && 0)
+#define IS_KABYLAKE_ULX(xe) (xe && 0)
+#define IS_SKYLAKE_ULX(xe) (xe && 0)
+#define IS_HASWELL_ULX(xe) (xe && 0)
+#define IS_COMETLAKE_ULT(xe) (xe && 0)
+#define IS_COFFEELAKE_ULT(xe) (xe && 0)
+#define IS_KABYLAKE_ULT(xe) (xe && 0)
+#define IS_SKYLAKE_ULT(xe) (xe && 0)
+
+#define IS_DG1_GRAPHICS_STEP(xe, first, last) (IS_DG1(xe) && IS_GRAPHICS_STEP(xe, first, last))
+#define IS_DG2_GRAPHICS_STEP(xe, variant, first, last) \
+	((xe)->info.subplatform == XE_SUBPLATFORM_DG2_ ## variant && \
+	 IS_GRAPHICS_STEP(xe, first, last))
+#define IS_XEHPSDV_GRAPHICS_STEP(xe, first, last) (IS_XEHPSDV(xe) && IS_GRAPHICS_STEP(xe, first, last))
+
+/* XXX: No basedie stepping support yet */
+#define IS_PVC_BD_STEP(xe, first, last) (!WARN_ON(1) && IS_PONTEVECCHIO(xe))
+
+#define IS_TIGERLAKE_DISPLAY_STEP(xe, first, last) (IS_TIGERLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_ROCKETLAKE_DISPLAY_STEP(xe, first, last) (IS_ROCKETLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_DG1_DISPLAY_STEP(xe, first, last) (IS_DG1(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_DG2_DISPLAY_STEP(xe, first, last) (IS_DG2(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_ADLP_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_P(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_ADLS_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_S(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_JSL_EHL_DISPLAY_STEP(xe, first, last) (IS_JSL_EHL(xe) && IS_DISPLAY_STEP(xe, first, last))
+#define IS_MTL_DISPLAY_STEP(xe, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
+
+/* FIXME: Add subplatform here */
+#define IS_MTL_GRAPHICS_STEP(xe, sub, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
+
+#define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10)
+#define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11)
+#define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12)
+#define IS_RAPTORLAKE_U(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU)
+#define IS_ICL_WITH_PORT_F(xe) (xe && 0)
+#define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe))
+#define to_intel_bo(x) gem_to_xe_bo((x))
+#define mkwrite_device_info(xe) (INTEL_INFO(xe))
+
+#define HAS_128_BYTE_Y_TILING(xe) (xe || 1)
+
+#define intel_has_gpu_reset(a) (a && 0)
+
+#include "intel_wakeref.h"
+
+static inline bool intel_runtime_pm_get(struct xe_runtime_pm *pm)
+{
+	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
+
+	if (xe_pm_runtime_get(xe) < 0) {
+		xe_pm_runtime_put(xe);
+		return false;
+	}
+	return true;
+}
+
+static inline bool intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm)
+{
+	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
+
+	return xe_pm_runtime_get_if_active(xe);
+}
+
+static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm)
+{
+	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
+
+	xe_pm_runtime_put(xe);
+}
+
+static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, bool wakeref)
+{
+	if (wakeref)
+		intel_runtime_pm_put_unchecked(pm);
+}
+
+#define intel_runtime_pm_get_raw intel_runtime_pm_get
+#define intel_runtime_pm_put_raw intel_runtime_pm_put
+#define assert_rpm_wakelock_held(x) do { } while (0)
+#define assert_rpm_raw_wakeref_held(x) do { } while (0)
+
+#define intel_uncore_forcewake_get(x, y) do { } while (0)
+#define intel_uncore_forcewake_put(x, y) do { } while (0)
+
+#define intel_uncore_arm_unclaimed_mmio_detection(x) do { } while (0)
+
+#define I915_PRIORITY_DISPLAY 0
+struct i915_sched_attr {
+	int priority;
+};
+#define i915_gem_fence_wait_priority(fence, attr) do { (void) attr; } while (0)
+
+#define with_intel_runtime_pm(rpm, wf) \
+	for ((wf) = intel_runtime_pm_get(rpm); (wf); \
+	     intel_runtime_pm_put((rpm), (wf)), (wf) = 0)
+
+#define pdev_to_i915 pdev_to_xe_device
+#define RUNTIME_INFO(xe)		(&(xe)->info.i915_runtime)
+
+#define FORCEWAKE_ALL XE_FORCEWAKE_ALL
+#define HPD_STORM_DEFAULT_THRESHOLD 50
+
+#ifdef CONFIG_ARM64
+/*
+ * arm64 indirectly includes linux/rtc.h,
+ * which defines a irq_lock, so include it
+ * here before #define-ing it
+ */
+#include <linux/rtc.h>
+#endif
+
+#define irq_lock irq.lock
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h
new file mode 100644
index 000000000000..12c671fd5235
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_fixed.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h
new file mode 100644
index 000000000000..06b723a479c5
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __I915_GEM_H__
+#define __I915_GEM_H__
+#define GEM_BUG_ON
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
new file mode 100644
index 000000000000..888e7a87a925
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
@@ -0,0 +1,79 @@
+#ifndef _I915_GEM_STOLEN_H_
+#define _I915_GEM_STOLEN_H_
+
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_res_cursor.h"
+
+struct xe_bo;
+
+struct i915_stolen_fb {
+	struct xe_bo *bo;
+};
+
+static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
+						       struct i915_stolen_fb *fb,
+						       u32 size, u32 align,
+						       u32 start, u32 end)
+{
+	struct xe_bo *bo;
+	int err;
+	u32 flags = XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_STOLEN_BIT;
+
+	bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe),
+				       NULL, size, start, end,
+				       ttm_bo_type_kernel, flags);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		bo = NULL;
+		return err;
+	}
+	err = xe_bo_pin(bo);
+	xe_bo_unlock_vm_held(bo);
+
+	if (err) {
+		xe_bo_put(fb->bo);
+		bo = NULL;
+	}
+
+	fb->bo = bo;
+
+	return err;
+}
+
+static inline int i915_gem_stolen_insert_node(struct xe_device *xe,
+					      struct i915_stolen_fb *fb,
+					      u32 size, u32 align)
+{
+	/* Not used on xe */
+	BUG_ON(1);
+	return -ENODEV;
+}
+
+static inline void i915_gem_stolen_remove_node(struct xe_device *xe,
+					       struct i915_stolen_fb *fb)
+{
+	xe_bo_unpin_map_no_vm(fb->bo);
+	fb->bo = NULL;
+}
+
+#define i915_gem_stolen_initialized(xe) (!!ttm_manager_type(&(xe)->ttm, XE_PL_STOLEN))
+#define i915_gem_stolen_node_allocated(fb) (!!((fb)->bo))
+
+static inline u32 i915_gem_stolen_node_offset(struct i915_stolen_fb *fb)
+{
+	struct xe_res_cursor res;
+
+	xe_res_first(fb->bo->ttm.resource, 0, 4096, &res);
+	return res.start;
+}
+
+/* Used for < gen4. These are not supported by Xe */
+#define i915_gem_stolen_area_address(xe) (!WARN_ON(1))
+/* Used for gen9 specific WA. Gen9 is not supported by Xe */
+#define i915_gem_stolen_area_size(xe) (!WARN_ON(1))
+
+#define i915_gem_stolen_node_address(xe, fb) (xe_ttm_stolen_gpu_offset(xe) + \
+					 i915_gem_stolen_node_offset(fb))
+#define i915_gem_stolen_node_size(fb) ((u64)((fb)->bo->ttm.base.size))
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h
new file mode 100644
index 000000000000..98e9dd78f670
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _I915_GPU_ERROR_H_
+#define _I915_GPU_ERROR_H_
+
+struct drm_i915_error_state_buf;
+
+__printf(2, 3)
+static inline void
+i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
+{
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h
new file mode 100644
index 000000000000..61707a07f91f
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_irq.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h
new file mode 100644
index 000000000000..8619ec015ad4
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_reg.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h
new file mode 100644
index 000000000000..723279c975b1
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_reg_defs.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h
new file mode 100644
index 000000000000..d429d421ac70
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#define trace_i915_reg_rw(a...) do { } while (0)
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h
new file mode 100644
index 000000000000..1d7c4360e5c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/i915_utils.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h
new file mode 100644
index 000000000000..80b024d435dc
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _I915_VGPU_H_
+#define _I915_VGPU_H_
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct i915_ggtt;
+
+static inline void intel_vgpu_detect(struct drm_i915_private *i915)
+{
+}
+static inline bool intel_vgpu_active(struct drm_i915_private *i915)
+{
+	return false;
+}
+static inline void intel_vgpu_register(struct drm_i915_private *i915)
+{
+}
+static inline bool intel_vgpu_has_full_ppgtt(struct drm_i915_private *i915)
+{
+	return false;
+}
+static inline bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *i915)
+{
+	return false;
+}
+static inline bool intel_vgpu_has_huge_gtt(struct drm_i915_private *i915)
+{
+	return false;
+}
+static inline int intel_vgt_balloon(struct i915_ggtt *ggtt)
+{
+	return 0;
+}
+static inline void intel_vgt_deballoon(struct i915_ggtt *ggtt)
+{
+}
+
+#endif /* _I915_VGPU_H_ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
new file mode 100644
index 000000000000..a20d2638ea7a
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef I915_VMA_H
+#define I915_VMA_H
+
+#include <uapi/drm/i915_drm.h>
+#include <drm/drm_mm.h>
+
+/* We don't want these from i915_drm.h in case of Xe */
+#undef I915_TILING_X
+#undef I915_TILING_Y
+#define I915_TILING_X 0
+#define I915_TILING_Y 0
+
+struct xe_bo;
+
+struct i915_vma {
+	struct xe_bo *bo, *dpt;
+	struct drm_mm_node node;
+};
+
+#define i915_ggtt_clear_scanout(bo) do { } while (0)
+
+#define i915_vma_fence_id(vma) -1
+
+static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
+{
+	return vma->node.start;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h
new file mode 100644
index 000000000000..e7aaf50f5485
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+/* XX: Figure out how to handle this vma mapping in xe */
+struct intel_remapped_plane_info {
+	/* in gtt pages */
+	u32 offset:31;
+	u32 linear:1;
+	union {
+		/* in gtt pages for !linear */
+		struct {
+			u16 width;
+			u16 height;
+			u16 src_stride;
+			u16 dst_stride;
+		};
+
+		/* in gtt pages for linear */
+		u32 size;
+	};
+} __packed;
+
+struct intel_remapped_info {
+	struct intel_remapped_plane_info plane[4];
+	/* in gtt pages */
+	u32 plane_alignment;
+} __packed;
+
+struct intel_rotation_info {
+	struct intel_remapped_plane_info plane[2];
+} __packed;
+
+enum i915_gtt_view_type {
+	I915_GTT_VIEW_NORMAL = 0,
+	I915_GTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
+	I915_GTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
+};
+
+static inline void assert_i915_gem_gtt_types(void)
+{
+	BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 2 * sizeof(u32) + 8 * sizeof(u16));
+	BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 5 * sizeof(u32) + 16 * sizeof(u16));
+
+	/* Check that rotation/remapped shares offsets for simplicity */
+	BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) !=
+		     offsetof(struct intel_rotation_info, plane[0]));
+	BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) !=
+		     offsetofend(struct intel_rotation_info, plane[1]));
+
+	/* As we encode the size of each branch inside the union into its type,
+	 * we have to be careful that each branch has a unique size.
+	 */
+	switch ((enum i915_gtt_view_type)0) {
+	case I915_GTT_VIEW_NORMAL:
+	case I915_GTT_VIEW_ROTATED:
+	case I915_GTT_VIEW_REMAPPED:
+		/* gcc complains if these are identical cases */
+		break;
+	}
+}
+
+struct i915_gtt_view {
+	enum i915_gtt_view_type type;
+	union {
+		/* Members need to contain no holes/padding */
+		struct intel_rotation_info rotated;
+		struct intel_remapped_info remapped;
+	};
+};
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h
new file mode 100644
index 000000000000..ce986f0e8f38
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/intel_clock_gating.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h
new file mode 100644
index 000000000000..c15806d6c4f7
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_TYPES__
+#define __INTEL_GT_TYPES__
+
+#define intel_gt_support_legacy_fencing(gt) 0
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h
new file mode 100644
index 000000000000..55b316985340
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/intel_mchbar_regs.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h
new file mode 100644
index 000000000000..8c15867fd613
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/intel_pci_config.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h
new file mode 100644
index 000000000000..0c47661bdc6a
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_PCODE_H__
+#define __INTEL_PCODE_H__
+
+#include "intel_uncore.h"
+#include "xe_pcode.h"
+
+static inline int
+snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val,
+			int fast_timeout_us, int slow_timeout_ms)
+{
+	return xe_pcode_write_timeout(__compat_uncore_to_gt(uncore), mbox, val,
+				      slow_timeout_ms ?: 1);
+}
+
+static inline int
+snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val)
+{
+
+	return xe_pcode_write(__compat_uncore_to_gt(uncore), mbox, val);
+}
+
+static inline int
+snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1)
+{
+	return xe_pcode_read(__compat_uncore_to_gt(uncore), mbox, val, val1);
+}
+
+static inline int
+skl_pcode_request(struct intel_uncore *uncore, u32 mbox,
+		  u32 request, u32 reply_mask, u32 reply,
+		  int timeout_base_ms)
+{
+	return xe_pcode_request(__compat_uncore_to_gt(uncore), mbox, request, reply_mask, reply,
+				timeout_base_ms);
+}
+
+#endif /* __INTEL_PCODE_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
new file mode 100644
index 000000000000..89da3cc62f39
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "intel_wakeref.h"
+
+#define intel_runtime_pm xe_runtime_pm
+
+static inline void disable_rpm_wakeref_asserts(void *rpm)
+{
+}
+
+static inline void enable_rpm_wakeref_asserts(void *rpm)
+{
+}
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h
new file mode 100644
index 000000000000..0006ef812346
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_STEP_H__
+#define __INTEL_STEP_H__
+
+#include "xe_device_types.h"
+#include "xe_step.h"
+
+#define intel_display_step_name xe_display_step_name
+
+static inline
+const char *xe_display_step_name(struct xe_device *xe)
+{
+	return xe_step_name(xe->info.step.display);
+}
+
+#endif /* __INTEL_STEP_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h
new file mode 100644
index 000000000000..009745328992
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _INTEL_UC_FW_H_
+#define _INTEL_UC_FW_H_
+
+#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git"
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
new file mode 100644
index 000000000000..cd26ddc0f69e
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_UNCORE_H__
+#define __INTEL_UNCORE_H__
+
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_mmio.h"
+
+static inline struct xe_gt *__compat_uncore_to_gt(struct intel_uncore *uncore)
+{
+	struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
+
+	return xe_root_mmio_gt(xe);
+}
+
+static inline u32 intel_uncore_read(struct intel_uncore *uncore,
+				    i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline u32 intel_uncore_read8(struct intel_uncore *uncore,
+				     i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read8(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline u32 intel_uncore_read16(struct intel_uncore *uncore,
+				      i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read16(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline u64
+intel_uncore_read64_2x32(struct intel_uncore *uncore,
+			 i915_reg_t i915_lower_reg, i915_reg_t i915_upper_reg)
+{
+	struct xe_reg lower_reg = XE_REG(i915_mmio_reg_offset(i915_lower_reg));
+	struct xe_reg upper_reg = XE_REG(i915_mmio_reg_offset(i915_upper_reg));
+	u32 upper, lower, old_upper;
+	int loop = 0;
+
+	upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg);
+	do {
+		old_upper = upper;
+		lower = xe_mmio_read32(__compat_uncore_to_gt(uncore), lower_reg);
+		upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg);
+	} while (upper != old_upper && loop++ < 2);
+
+	return (u64)upper << 32 | lower;
+}
+
+static inline void intel_uncore_posting_read(struct intel_uncore *uncore,
+					     i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline void intel_uncore_write(struct intel_uncore *uncore,
+				      i915_reg_t i915_reg, u32 val)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+}
+
+static inline u32 intel_uncore_rmw(struct intel_uncore *uncore,
+				   i915_reg_t i915_reg, u32 clear, u32 set)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_rmw32(__compat_uncore_to_gt(uncore), reg, clear, set);
+}
+
+static inline int intel_wait_for_register(struct intel_uncore *uncore,
+					  i915_reg_t i915_reg, u32 mask,
+					  u32 value, unsigned int timeout)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+			      timeout * USEC_PER_MSEC, NULL, false);
+}
+
+static inline int intel_wait_for_register_fw(struct intel_uncore *uncore,
+					     i915_reg_t i915_reg, u32 mask,
+					     u32 value, unsigned int timeout)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+			      timeout * USEC_PER_MSEC, NULL, false);
+}
+
+static inline int
+__intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg,
+			  u32 mask, u32 value, unsigned int fast_timeout_us,
+			  unsigned int slow_timeout_ms, u32 *out_value)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value,
+			      fast_timeout_us + 1000 * slow_timeout_ms,
+			      out_value, false);
+}
+
+static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore,
+				       i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline void intel_uncore_write_fw(struct intel_uncore *uncore,
+					 i915_reg_t i915_reg, u32 val)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+}
+
+static inline u32 intel_uncore_read_notrace(struct intel_uncore *uncore,
+					    i915_reg_t i915_reg)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
+}
+
+static inline void intel_uncore_write_notrace(struct intel_uncore *uncore,
+					      i915_reg_t i915_reg, u32 val)
+{
+	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
+
+	xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val);
+}
+
+static inline void __iomem *intel_uncore_regs(struct intel_uncore *uncore)
+{
+	struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
+
+	return xe_device_get_root_tile(xe)->mmio.regs;
+}
+
+/*
+ * The raw_reg_{read,write} macros are intended as a micro-optimization for
+ * interrupt handlers so that the pointer indirection on uncore->regs can
+ * be computed once (and presumably cached in a register) instead of generating
+ * extra load instructions for each MMIO access.
+ *
+ * Given that these macros are only intended for non-GSI interrupt registers
+ * (and the goal is to avoid extra instructions generated by the compiler),
+ * these macros do not account for uncore->gsi_offset.  Any caller that needs
+ * to use these macros on a GSI register is responsible for adding the
+ * appropriate GSI offset to the 'base' parameter.
+ */
+#define raw_reg_read(base, reg) \
+	readl(base + i915_mmio_reg_offset(reg))
+#define raw_reg_write(base, reg, value) \
+	writel(value, base + i915_mmio_reg_offset(reg))
+
+#endif /* __INTEL_UNCORE_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
new file mode 100644
index 000000000000..1c5e30cf10ca
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/types.h>
+
+typedef bool intel_wakeref_t;
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h
new file mode 100644
index 000000000000..c2c30ece8f77
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_PXP_H__
+#define __INTEL_PXP_H__
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct drm_i915_gem_object;
+struct intel_pxp;
+
+static inline int intel_pxp_key_check(struct intel_pxp *pxp,
+				      struct drm_i915_gem_object *obj,
+				      bool assign)
+{
+	return -ENODEV;
+}
+
+static inline bool
+i915_gem_object_is_protected(const struct drm_i915_gem_object *obj)
+{
+	return false;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h
new file mode 100644
index 000000000000..65707e20c557
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../../i915/soc/intel_dram.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h
new file mode 100644
index 000000000000..33c5257b3a71
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../../i915/soc/intel_gmch.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h
new file mode 100644
index 000000000000..9c46556d33a4
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../../i915/soc/intel_pch.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h
new file mode 100644
index 000000000000..ec6f12de5727
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2013-2021 Intel Corporation
+ */
+
+#ifndef _VLV_SIDEBAND_H_
+#define _VLV_SIDEBAND_H_
+
+#include <linux/types.h>
+
+#include "vlv_sideband_reg.h"
+
+enum pipe;
+struct drm_i915_private;
+
+enum {
+	VLV_IOSF_SB_BUNIT,
+	VLV_IOSF_SB_CCK,
+	VLV_IOSF_SB_CCU,
+	VLV_IOSF_SB_DPIO,
+	VLV_IOSF_SB_FLISDSI,
+	VLV_IOSF_SB_GPIO,
+	VLV_IOSF_SB_NC,
+	VLV_IOSF_SB_PUNIT,
+};
+
+static inline void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports)
+{
+}
+static inline u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_iosf_sb_write(struct drm_i915_private *i915,
+				     u8 port, u32 reg, u32 val)
+{
+}
+static inline void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports)
+{
+}
+static inline void vlv_bunit_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val)
+{
+}
+static inline void vlv_bunit_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_cck_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val)
+{
+}
+static inline void vlv_cck_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_ccu_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val)
+{
+}
+static inline void vlv_ccu_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_dpio_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_dpio_read(struct drm_i915_private *i915, int pipe, int reg)
+{
+	return 0;
+}
+static inline void vlv_dpio_write(struct drm_i915_private *i915,
+				  int pipe, int reg, u32 val)
+{
+}
+static inline void vlv_dpio_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_flisdsi_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg)
+{
+	return 0;
+}
+static inline void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val)
+{
+}
+static inline void vlv_flisdsi_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_nc_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr)
+{
+	return 0;
+}
+static inline void vlv_nc_put(struct drm_i915_private *i915)
+{
+}
+static inline void vlv_punit_get(struct drm_i915_private *i915)
+{
+}
+static inline u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr)
+{
+	return 0;
+}
+static inline int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val)
+{
+	return 0;
+}
+static inline void vlv_punit_put(struct drm_i915_private *i915)
+{
+}
+
+#endif /* _VLV_SIDEBAND_H_ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h
new file mode 100644
index 000000000000..949f134ce3cf
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "../../i915/vlv_sideband_reg.h"
diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c
new file mode 100644
index 000000000000..bee191a4a97d
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
+#include "intel_uncore.h"
+
+void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr,
+		    i915_reg_t iir, i915_reg_t ier)
+{
+	intel_uncore_write(uncore, imr, 0xffffffff);
+	intel_uncore_posting_read(uncore, imr);
+
+	intel_uncore_write(uncore, ier, 0);
+
+	/* IIR can theoretically queue up two events. Be paranoid. */
+	intel_uncore_write(uncore, iir, 0xffffffff);
+	intel_uncore_posting_read(uncore, iir);
+	intel_uncore_write(uncore, iir, 0xffffffff);
+	intel_uncore_posting_read(uncore, iir);
+}
+
+/*
+ * We should clear IMR at preinstall/uninstall, and just check at postinstall.
+ */
+void gen3_assert_iir_is_zero(struct intel_uncore *uncore, i915_reg_t reg)
+{
+	struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
+	u32 val = intel_uncore_read(uncore, reg);
+
+	if (val == 0)
+		return;
+
+	drm_WARN(&xe->drm, 1,
+		 "Interrupt register 0x%x is not zero: 0x%08x\n",
+		 i915_mmio_reg_offset(reg), val);
+	intel_uncore_write(uncore, reg, 0xffffffff);
+	intel_uncore_posting_read(uncore, reg);
+	intel_uncore_write(uncore, reg, 0xffffffff);
+	intel_uncore_posting_read(uncore, reg);
+}
+
+void gen3_irq_init(struct intel_uncore *uncore,
+		   i915_reg_t imr, u32 imr_val,
+		   i915_reg_t ier, u32 ier_val,
+		   i915_reg_t iir)
+{
+	gen3_assert_iir_is_zero(uncore, iir);
+
+	intel_uncore_write(uncore, ier, ier_val);
+	intel_uncore_write(uncore, imr, imr_val);
+	intel_uncore_posting_read(uncore, imr);
+}
+
+bool intel_irqs_enabled(struct xe_device *xe)
+{
+	/*
+	 * XXX: i915 has a racy handling of the irq.enabled, since it doesn't
+	 * lock its transitions. Because of that, the irq.enabled sometimes
+	 * is not read with the irq.lock in place.
+	 * However, the most critical cases like vblank and page flips are
+	 * properly using the locks.
+	 * We cannot take the lock in here or run any kind of assert because
+	 * of i915 inconsistency.
+	 * But at this point the xe irq is better protected against races,
+	 * although the full solution would be protecting the i915 side.
+	 */
+	return xe->irq.enabled;
+}
+
+void intel_synchronize_irq(struct xe_device *xe)
+{
+	synchronize_irq(to_pci_dev(xe->drm.dev)->irq);
+}
diff --git a/drivers/gpu/drm/xe/display/ext/i915_utils.c b/drivers/gpu/drm/xe/display/ext/i915_utils.c
new file mode 100644
index 000000000000..43b10a2cc508
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/ext/i915_utils.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "i915_drv.h"
+
+bool i915_vtd_active(struct drm_i915_private *i915)
+{
+	if (device_iommu_mapped(i915->drm.dev))
+		return true;
+
+	/* Running as a guest, we assume the host is enforcing VT'd */
+	return i915_run_as_guest();
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
+
+/* i915 specific, just put here for shutting it up */
+int __i915_inject_probe_error(struct drm_i915_private *i915, int err,
+			      const char *func, int line)
+{
+	return 0;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c
new file mode 100644
index 000000000000..b21da7b745a5
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/drm_modeset_helper.h>
+
+#include "i915_drv.h"
+#include "intel_display_types.h"
+#include "intel_fb_bo.h"
+
+void intel_fb_bo_framebuffer_fini(struct xe_bo *bo)
+{
+	if (bo->flags & XE_BO_CREATE_PINNED_BIT) {
+		/* Unpin our kernel fb first */
+		xe_bo_lock(bo, false);
+		xe_bo_unpin(bo);
+		xe_bo_unlock(bo);
+	}
+	xe_bo_put(bo);
+}
+
+int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
+				 struct xe_bo *bo,
+				 struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_i915_private *i915 = to_i915(bo->ttm.base.dev);
+	int ret;
+
+	xe_bo_get(bo);
+
+	ret = ttm_bo_reserve(&bo->ttm, true, false, NULL);
+	if (ret)
+		return ret;
+
+	if (!(bo->flags & XE_BO_SCANOUT_BIT)) {
+		/*
+		 * XE_BO_SCANOUT_BIT should ideally be set at creation, or is
+		 * automatically set when creating FB. We cannot change caching
+		 * mode when the boect is VM_BINDed, so we can only set
+		 * coherency with display when unbound.
+		 */
+		if (XE_IOCTL_DBG(i915, !list_empty(&bo->ttm.base.gpuva.list))) {
+			ttm_bo_unreserve(&bo->ttm);
+			return -EINVAL;
+		}
+		bo->flags |= XE_BO_SCANOUT_BIT;
+	}
+	ttm_bo_unreserve(&bo->ttm);
+
+	return ret;
+}
+
+struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+					  struct drm_file *filp,
+					  const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_i915_gem_object *bo;
+	struct drm_gem_object *gem = drm_gem_object_lookup(filp, mode_cmd->handles[0]);
+
+	if (!gem)
+		return ERR_PTR(-ENOENT);
+
+	bo = gem_to_xe_bo(gem);
+	/* Require vram placement or dma-buf import */
+	if (IS_DGFX(i915) &&
+	    !xe_bo_can_migrate(gem_to_xe_bo(gem), XE_PL_VRAM0) &&
+	    bo->ttm.type != ttm_bo_type_sg) {
+		drm_gem_object_put(gem);
+		return ERR_PTR(-EREMOTE);
+	}
+
+	return bo;
+}
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.h b/drivers/gpu/drm/xe/display/intel_fb_bo.h
new file mode 100644
index 000000000000..5d365b925b7a
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_fb_bo.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __INTEL_FB_BO_H__
+#define __INTEL_FB_BO_H__
+
+struct drm_file;
+struct drm_mode_fb_cmd2;
+struct drm_i915_private;
+struct intel_framebuffer;
+struct xe_bo;
+
+void intel_fb_bo_framebuffer_fini(struct xe_bo *bo);
+int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
+				 struct xe_bo *bo,
+				 struct drm_mode_fb_cmd2 *mode_cmd);
+
+struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915,
+					  struct drm_file *filp,
+					  const struct drm_mode_fb_cmd2 *mode_cmd);
+
+#endif
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
new file mode 100644
index 000000000000..51ae3561fd0d
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "intel_fbdev_fb.h"
+
+#include <drm/drm_fb_helper.h>
+
+#include "xe_gt.h"
+#include "xe_ttm_stolen_mgr.h"
+
+#include "i915_drv.h"
+#include "intel_display_types.h"
+
+struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
+			 struct drm_fb_helper_surface_size *sizes)
+{
+	struct drm_framebuffer *fb;
+	struct drm_device *dev = helper->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_mode_fb_cmd2 mode_cmd = {};
+	struct drm_i915_gem_object *obj;
+	int size;
+
+	/* we don't do packed 24bpp */
+	if (sizes->surface_bpp == 24)
+		sizes->surface_bpp = 32;
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+
+	mode_cmd.pitches[0] = ALIGN(mode_cmd.width *
+				    DIV_ROUND_UP(sizes->surface_bpp, 8), XE_PAGE_SIZE);
+	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
+							  sizes->surface_depth);
+
+	size = mode_cmd.pitches[0] * mode_cmd.height;
+	size = PAGE_ALIGN(size);
+	obj = ERR_PTR(-ENODEV);
+
+	if (!IS_DGFX(dev_priv)) {
+		obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv),
+					   NULL, size,
+					   ttm_bo_type_kernel, XE_BO_SCANOUT_BIT |
+					   XE_BO_CREATE_STOLEN_BIT |
+					   XE_BO_CREATE_PINNED_BIT);
+		if (!IS_ERR(obj))
+			drm_info(&dev_priv->drm, "Allocated fbdev into stolen\n");
+		else
+			drm_info(&dev_priv->drm, "Allocated fbdev into stolen failed: %li\n", PTR_ERR(obj));
+	}
+	if (IS_ERR(obj)) {
+		obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv), NULL, size,
+					  ttm_bo_type_kernel, XE_BO_SCANOUT_BIT |
+					  XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(dev_priv)) |
+					  XE_BO_CREATE_PINNED_BIT);
+	}
+
+	if (IS_ERR(obj)) {
+		drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj);
+		fb = ERR_PTR(-ENOMEM);
+		goto err;
+	}
+
+	fb = intel_framebuffer_create(obj, &mode_cmd);
+	if (IS_ERR(fb)) {
+		xe_bo_unpin_map_no_vm(obj);
+		goto err;
+	}
+
+	drm_gem_object_put(intel_bo_to_drm_bo(obj));
+	return fb;
+
+err:
+	return fb;
+}
+
+int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+			      struct drm_i915_gem_object *obj, struct i915_vma *vma)
+{
+	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+
+	if (!(obj->flags & XE_BO_CREATE_SYSTEM_BIT)) {
+		if (obj->flags & XE_BO_CREATE_STOLEN_BIT)
+			info->fix.smem_start = xe_ttm_stolen_io_offset(obj, 0);
+		else
+			info->fix.smem_start =
+				pci_resource_start(pdev, 2) +
+				xe_bo_addr(obj, 0, XE_PAGE_SIZE);
+
+		info->fix.smem_len = obj->ttm.base.size;
+	} else {
+		/* XXX: Pure fiction, as the BO may not be physically accessible.. */
+		info->fix.smem_start = 0;
+		info->fix.smem_len = obj->ttm.base.size;
+	}
+	XE_WARN_ON(iosys_map_is_null(&obj->vmap));
+
+	info->screen_base = obj->vmap.vaddr_iomem;
+	info->screen_size = intel_bo_to_drm_bo(obj)->size;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.h b/drivers/gpu/drm/xe/display/intel_fbdev_fb.h
new file mode 100644
index 000000000000..ea186772e0bb
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef __INTEL_FBDEV_FB_H__
+#define __INTEL_FBDEV_FB_H__
+
+struct drm_fb_helper;
+struct drm_fb_helper_surface_size;
+struct drm_i915_gem_object;
+struct drm_i915_private;
+struct fb_info;
+struct i915_vma;
+
+struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
+			 struct drm_fb_helper_surface_size *sizes);
+int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+			      struct drm_i915_gem_object *obj, struct i915_vma *vma);
+
+#endif
diff --git a/drivers/gpu/drm/xe/display/xe_display_misc.c b/drivers/gpu/drm/xe/display/xe_display_misc.c
new file mode 100644
index 000000000000..242c2ef4ca93
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_display_misc.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "intel_display_types.h"
+
+struct pci_dev;
+
+unsigned int intel_gmch_vga_set_decode(struct pci_dev *pdev, bool enable_decode);
+
+unsigned int intel_gmch_vga_set_decode(struct pci_dev *pdev, bool enable_decode)
+{
+	/* ToDo: Implement the actual handling of vga decode */
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/display/xe_display_rps.c b/drivers/gpu/drm/xe/display/xe_display_rps.c
new file mode 100644
index 000000000000..ab21c581c192
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_display_rps.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "intel_display_rps.h"
+
+void intel_display_rps_boost_after_vblank(struct drm_crtc *crtc,
+					  struct dma_fence *fence)
+{
+}
+
+void intel_display_rps_mark_interactive(struct drm_i915_private *i915,
+					struct intel_atomic_state *state,
+					bool interactive)
+{
+}
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
new file mode 100644
index 000000000000..27c2fb1c002a
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023, Intel Corporation.
+ */
+
+#include "i915_drv.h"
+#include "i915_vma.h"
+#include "intel_display_types.h"
+#include "intel_dsb_buffer.h"
+#include "xe_bo.h"
+#include "xe_gt.h"
+
+u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
+{
+	return xe_bo_ggtt_addr(dsb_buf->vma->bo);
+}
+
+void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
+{
+	iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
+}
+
+u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
+{
+	return iosys_map_rd(&dsb_buf->vma->bo->vmap, idx * 4, u32);
+}
+
+void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
+{
+	WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
+
+	iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
+}
+
+bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
+{
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+
+	vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	if (!vma)
+		return false;
+
+	obj = xe_bo_create_pin_map(i915, xe_device_get_root_tile(i915),
+				   NULL, PAGE_ALIGN(size),
+				   ttm_bo_type_kernel,
+				   XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(i915)) |
+				   XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(obj)) {
+		kfree(vma);
+		return false;
+	}
+
+	vma->bo = obj;
+	dsb_buf->vma = vma;
+	dsb_buf->buf_size = size;
+
+	return true;
+}
+
+void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
+{
+	xe_bo_unpin_map_no_vm(dsb_buf->vma->bo);
+	kfree(dsb_buf->vma);
+}
+
+void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
+{
+	/* TODO: add xe specific flush_map() for dsb buffer object. */
+}
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
new file mode 100644
index 000000000000..722c84a56607
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_display_types.h"
+#include "intel_dpt.h"
+#include "intel_fb.h"
+#include "intel_fb_pin.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+
+#include <drm/ttm/ttm_bo.h>
+
+static void
+write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ofs,
+		  u32 width, u32 height, u32 src_stride, u32 dst_stride)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
+	u32 column, row;
+
+	/* TODO: Maybe rewrite so we can traverse the bo addresses sequentially,
+	 * by writing dpt/ggtt in a different order?
+	 */
+
+	for (column = 0; column < width; column++) {
+		u32 src_idx = src_stride * (height - 1) + column + bo_ofs;
+
+		for (row = 0; row < height; row++) {
+			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
+							      xe->pat.idx[XE_CACHE_WB]);
+
+			iosys_map_wr(map, *dpt_ofs, u64, pte);
+			*dpt_ofs += 8;
+			src_idx -= src_stride;
+		}
+
+		/* The DE ignores the PTEs for the padding tiles */
+		*dpt_ofs += (dst_stride - height) * 8;
+	}
+
+	/* Align to next page */
+	*dpt_ofs = ALIGN(*dpt_ofs, 4096);
+}
+
+static void
+write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs,
+		   u32 bo_ofs, u32 width, u32 height, u32 src_stride,
+		   u32 dst_stride)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
+	u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index)
+		= ggtt->pt_ops->pte_encode_bo;
+	u32 column, row;
+
+	for (row = 0; row < height; row++) {
+		u32 src_idx = src_stride * row + bo_ofs;
+
+		for (column = 0; column < width; column++) {
+			iosys_map_wr(map, *dpt_ofs, u64,
+				     pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
+				     xe->pat.idx[XE_CACHE_WB]));
+
+			*dpt_ofs += 8;
+			src_idx++;
+		}
+
+		/* The DE ignores the PTEs for the padding tiles */
+		*dpt_ofs += (dst_stride - width) * 8;
+	}
+
+	/* Align to next page */
+	*dpt_ofs = ALIGN(*dpt_ofs, 4096);
+}
+
+static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
+			       const struct i915_gtt_view *view,
+			       struct i915_vma *vma)
+{
+	struct xe_device *xe = to_xe_device(fb->base.dev);
+	struct xe_tile *tile0 = xe_device_get_root_tile(xe);
+	struct xe_ggtt *ggtt = tile0->mem.ggtt;
+	struct xe_bo *bo = intel_fb_obj(&fb->base), *dpt;
+	u32 dpt_size, size = bo->ttm.base.size;
+
+	if (view->type == I915_GTT_VIEW_NORMAL)
+		dpt_size = ALIGN(size / XE_PAGE_SIZE * 8, XE_PAGE_SIZE);
+	else if (view->type == I915_GTT_VIEW_REMAPPED)
+		dpt_size = ALIGN(intel_remapped_info_size(&fb->remapped_view.gtt.remapped) * 8,
+				 XE_PAGE_SIZE);
+	else
+		/* display uses 4K tiles instead of bytes here, convert to entries.. */
+		dpt_size = ALIGN(intel_rotation_info_size(&view->rotated) * 8,
+				 XE_PAGE_SIZE);
+
+	if (IS_DGFX(xe))
+		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
+					   ttm_bo_type_kernel,
+					   XE_BO_CREATE_VRAM0_BIT |
+					   XE_BO_CREATE_GGTT_BIT);
+	else
+		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
+					   ttm_bo_type_kernel,
+					   XE_BO_CREATE_STOLEN_BIT |
+					   XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(dpt))
+		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
+					   ttm_bo_type_kernel,
+					   XE_BO_CREATE_SYSTEM_BIT |
+					   XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(dpt))
+		return PTR_ERR(dpt);
+
+	if (view->type == I915_GTT_VIEW_NORMAL) {
+		u32 x;
+
+		for (x = 0; x < size / XE_PAGE_SIZE; x++) {
+			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE,
+							      xe->pat.idx[XE_CACHE_WB]);
+
+			iosys_map_wr(&dpt->vmap, x * 8, u64, pte);
+		}
+	} else if (view->type == I915_GTT_VIEW_REMAPPED) {
+		const struct intel_remapped_info *remap_info = &view->remapped;
+		u32 i, dpt_ofs = 0;
+
+		for (i = 0; i < ARRAY_SIZE(remap_info->plane); i++)
+			write_dpt_remapped(bo, &dpt->vmap, &dpt_ofs,
+					   remap_info->plane[i].offset,
+					   remap_info->plane[i].width,
+					   remap_info->plane[i].height,
+					   remap_info->plane[i].src_stride,
+					   remap_info->plane[i].dst_stride);
+
+	} else {
+		const struct intel_rotation_info *rot_info = &view->rotated;
+		u32 i, dpt_ofs = 0;
+
+		for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++)
+			write_dpt_rotated(bo, &dpt->vmap, &dpt_ofs,
+					  rot_info->plane[i].offset,
+					  rot_info->plane[i].width,
+					  rot_info->plane[i].height,
+					  rot_info->plane[i].src_stride,
+					  rot_info->plane[i].dst_stride);
+	}
+
+	vma->dpt = dpt;
+	vma->node = dpt->ggtt_node;
+	return 0;
+}
+
+static void
+write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo_ofs,
+		   u32 width, u32 height, u32 src_stride, u32 dst_stride)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u32 column, row;
+
+	for (column = 0; column < width; column++) {
+		u32 src_idx = src_stride * (height - 1) + column + bo_ofs;
+
+		for (row = 0; row < height; row++) {
+			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
+							      xe->pat.idx[XE_CACHE_WB]);
+
+			xe_ggtt_set_pte(ggtt, *ggtt_ofs, pte);
+			*ggtt_ofs += XE_PAGE_SIZE;
+			src_idx -= src_stride;
+		}
+
+		/* The DE ignores the PTEs for the padding tiles */
+		*ggtt_ofs += (dst_stride - height) * XE_PAGE_SIZE;
+	}
+}
+
+static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
+				const struct i915_gtt_view *view,
+				struct i915_vma *vma)
+{
+	struct xe_bo *bo = intel_fb_obj(&fb->base);
+	struct xe_device *xe = to_xe_device(fb->base.dev);
+	struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
+	u32 align;
+	int ret;
+
+	/* TODO: Consider sharing framebuffer mapping?
+	 * embed i915_vma inside intel_framebuffer
+	 */
+	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
+	ret = mutex_lock_interruptible(&ggtt->lock);
+	if (ret)
+		goto out;
+
+	align = XE_PAGE_SIZE;
+	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
+		align = max_t(u32, align, SZ_64K);
+
+	if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) {
+		vma->node = bo->ggtt_node;
+	} else if (view->type == I915_GTT_VIEW_NORMAL) {
+		u32 x, size = bo->ttm.base.size;
+
+		ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size,
+							 align, 0);
+		if (ret)
+			goto out_unlock;
+
+		for (x = 0; x < size; x += XE_PAGE_SIZE) {
+			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x,
+							      xe->pat.idx[XE_CACHE_WB]);
+
+			xe_ggtt_set_pte(ggtt, vma->node.start + x, pte);
+		}
+	} else {
+		u32 i, ggtt_ofs;
+		const struct intel_rotation_info *rot_info = &view->rotated;
+
+		/* display seems to use tiles instead of bytes here, so convert it back.. */
+		u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE;
+
+		ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size,
+							 align, 0);
+		if (ret)
+			goto out_unlock;
+
+		ggtt_ofs = vma->node.start;
+
+		for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++)
+			write_ggtt_rotated(bo, ggtt, &ggtt_ofs,
+					   rot_info->plane[i].offset,
+					   rot_info->plane[i].width,
+					   rot_info->plane[i].height,
+					   rot_info->plane[i].src_stride,
+					   rot_info->plane[i].dst_stride);
+	}
+
+	xe_ggtt_invalidate(ggtt);
+out_unlock:
+	mutex_unlock(&ggtt->lock);
+out:
+	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+	return ret;
+}
+
+static struct i915_vma *__xe_pin_fb_vma(struct intel_framebuffer *fb,
+					const struct i915_gtt_view *view)
+{
+	struct drm_device *dev = fb->base.dev;
+	struct xe_device *xe = to_xe_device(dev);
+	struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	struct xe_bo *bo = intel_fb_obj(&fb->base);
+	int ret;
+
+	if (!vma)
+		return ERR_PTR(-ENODEV);
+
+	if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) &&
+	    intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 &&
+	    !(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) {
+		struct xe_tile *tile = xe_device_get_root_tile(xe);
+
+		/*
+		 * If we need to able to access the clear-color value stored in
+		 * the buffer, then we require that such buffers are also CPU
+		 * accessible.  This is important on small-bar systems where
+		 * only some subset of VRAM is CPU accessible.
+		 */
+		if (tile->mem.vram.io_size < tile->mem.vram.usable_size) {
+			ret = -EINVAL;
+			goto err;
+		}
+	}
+
+	/*
+	 * Pin the framebuffer, we can't use xe_bo_(un)pin functions as the
+	 * assumptions are incorrect for framebuffers
+	 */
+	ret = ttm_bo_reserve(&bo->ttm, false, false, NULL);
+	if (ret)
+		goto err;
+
+	if (IS_DGFX(xe))
+		ret = xe_bo_migrate(bo, XE_PL_VRAM0);
+	else
+		ret = xe_bo_validate(bo, NULL, true);
+	if (!ret)
+		ttm_bo_pin(&bo->ttm);
+	ttm_bo_unreserve(&bo->ttm);
+	if (ret)
+		goto err;
+
+	vma->bo = bo;
+	if (intel_fb_uses_dpt(&fb->base))
+		ret = __xe_pin_fb_vma_dpt(fb, view, vma);
+	else
+		ret = __xe_pin_fb_vma_ggtt(fb, view, vma);
+	if (ret)
+		goto err_unpin;
+
+	return vma;
+
+err_unpin:
+	ttm_bo_reserve(&bo->ttm, false, false, NULL);
+	ttm_bo_unpin(&bo->ttm);
+	ttm_bo_unreserve(&bo->ttm);
+err:
+	kfree(vma);
+	return ERR_PTR(ret);
+}
+
+static void __xe_unpin_fb_vma(struct i915_vma *vma)
+{
+	struct xe_device *xe = to_xe_device(vma->bo->ttm.base.dev);
+	struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
+
+	if (vma->dpt)
+		xe_bo_unpin_map_no_vm(vma->dpt);
+	else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) ||
+		 vma->bo->ggtt_node.start != vma->node.start)
+		xe_ggtt_remove_node(ggtt, &vma->node);
+
+	ttm_bo_reserve(&vma->bo->ttm, false, false, NULL);
+	ttm_bo_unpin(&vma->bo->ttm);
+	ttm_bo_unreserve(&vma->bo->ttm);
+	kfree(vma);
+}
+
+struct i915_vma *
+intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
+			   bool phys_cursor,
+			   const struct i915_gtt_view *view,
+			   bool uses_fence,
+			   unsigned long *out_flags)
+{
+	*out_flags = 0;
+
+	return __xe_pin_fb_vma(to_intel_framebuffer(fb), view);
+}
+
+void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
+{
+	__xe_unpin_fb_vma(vma);
+}
+
+int intel_plane_pin_fb(struct intel_plane_state *plane_state)
+{
+	struct drm_framebuffer *fb = plane_state->hw.fb;
+	struct xe_bo *bo = intel_fb_obj(fb);
+	struct i915_vma *vma;
+
+	/* We reject creating !SCANOUT fb's, so this is weird.. */
+	drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_SCANOUT_BIT));
+
+	vma = __xe_pin_fb_vma(to_intel_framebuffer(fb), &plane_state->view.gtt);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	plane_state->ggtt_vma = vma;
+	return 0;
+}
+
+void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state)
+{
+	__xe_unpin_fb_vma(old_plane_state->ggtt_vma);
+	old_plane_state->ggtt_vma = NULL;
+}
+
+/*
+ * For Xe introduce dummy intel_dpt_create which just return NULL and
+ * intel_dpt_destroy which does nothing.
+ */
+struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb)
+{
+	return NULL;
+}
+
+void intel_dpt_destroy(struct i915_address_space *vm)
+{
+	return;
+}
+\ No newline at end of file
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
new file mode 100644
index 000000000000..0f11a39333e2
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023, Intel Corporation.
+ */
+
+#include "i915_drv.h"
+#include "intel_hdcp_gsc.h"
+
+bool intel_hdcp_gsc_cs_required(struct drm_i915_private *i915)
+{
+	return true;
+}
+
+bool intel_hdcp_gsc_check_status(struct drm_i915_private *i915)
+{
+	return false;
+}
+
+int intel_hdcp_gsc_init(struct drm_i915_private *i915)
+{
+	drm_info(&i915->drm, "HDCP support not yet implemented\n");
+	return -ENODEV;
+}
+
+void intel_hdcp_gsc_fini(struct drm_i915_private *i915)
+{
+}
+
+ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in,
+				size_t msg_in_len, u8 *msg_out,
+				size_t msg_out_len)
+{
+	return -ENODEV;
+}
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
new file mode 100644
index 000000000000..ccf83c12b545
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+/* for ioread64 */
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#include "xe_ggtt.h"
+
+#include "i915_drv.h"
+#include "intel_atomic_plane.h"
+#include "intel_display.h"
+#include "intel_display_types.h"
+#include "intel_fb.h"
+#include "intel_fb_pin.h"
+#include "intel_frontbuffer.h"
+#include "intel_plane_initial.h"
+
+static bool
+intel_reuse_initial_plane_obj(struct drm_i915_private *i915,
+			      const struct intel_initial_plane_config *plane_config,
+			      struct drm_framebuffer **fb)
+{
+	struct intel_crtc *crtc;
+
+	for_each_intel_crtc(&i915->drm, crtc) {
+		struct intel_crtc_state *crtc_state =
+			to_intel_crtc_state(crtc->base.state);
+		struct intel_plane *plane =
+			to_intel_plane(crtc->base.primary);
+		struct intel_plane_state *plane_state =
+			to_intel_plane_state(plane->base.state);
+
+		if (!crtc_state->uapi.active)
+			continue;
+
+		if (!plane_state->ggtt_vma)
+			continue;
+
+		if (intel_plane_ggtt_offset(plane_state) == plane_config->base) {
+			*fb = plane_state->hw.fb;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static struct xe_bo *
+initial_plane_bo(struct xe_device *xe,
+		 struct intel_initial_plane_config *plane_config)
+{
+	struct xe_tile *tile0 = xe_device_get_root_tile(xe);
+	struct xe_bo *bo;
+	resource_size_t phys_base;
+	u32 base, size, flags;
+	u64 page_size = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+
+	if (plane_config->size == 0)
+		return NULL;
+
+	flags = XE_BO_CREATE_PINNED_BIT | XE_BO_SCANOUT_BIT | XE_BO_CREATE_GGTT_BIT;
+
+	base = round_down(plane_config->base, page_size);
+	if (IS_DGFX(xe)) {
+		u64 __iomem *gte = tile0->mem.ggtt->gsm;
+		u64 pte;
+
+		gte += base / XE_PAGE_SIZE;
+
+		pte = ioread64(gte);
+		if (!(pte & XE_GGTT_PTE_DM)) {
+			drm_err(&xe->drm,
+				"Initial plane programming missing DM bit\n");
+			return NULL;
+		}
+
+		phys_base = pte & ~(page_size - 1);
+		flags |= XE_BO_CREATE_VRAM0_BIT;
+
+		/*
+		 * We don't currently expect this to ever be placed in the
+		 * stolen portion.
+		 */
+		if (phys_base >= tile0->mem.vram.usable_size) {
+			drm_err(&xe->drm,
+				"Initial plane programming using invalid range, phys_base=%pa\n",
+				&phys_base);
+			return NULL;
+		}
+
+		drm_dbg(&xe->drm,
+			"Using phys_base=%pa, based on initial plane programming\n",
+			&phys_base);
+	} else {
+		struct ttm_resource_manager *stolen = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
+
+		if (!stolen)
+			return NULL;
+		phys_base = base;
+		flags |= XE_BO_CREATE_STOLEN_BIT;
+
+		/*
+		 * If the FB is too big, just don't use it since fbdev is not very
+		 * important and we should probably use that space with FBC or other
+		 * features.
+		 */
+		if (IS_ENABLED(CONFIG_FRAMEBUFFER_CONSOLE) &&
+		    plane_config->size * 2 >> PAGE_SHIFT >= stolen->size)
+			return NULL;
+	}
+
+	size = round_up(plane_config->base + plane_config->size,
+			page_size);
+	size -= base;
+
+	bo = xe_bo_create_pin_map_at(xe, tile0, NULL, size, phys_base,
+				     ttm_bo_type_kernel, flags);
+	if (IS_ERR(bo)) {
+		drm_dbg(&xe->drm,
+			"Failed to create bo phys_base=%pa size %u with flags %x: %li\n",
+			&phys_base, size, flags, PTR_ERR(bo));
+		return NULL;
+	}
+
+	return bo;
+}
+
+static bool
+intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
+			      struct intel_initial_plane_config *plane_config)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_mode_fb_cmd2 mode_cmd = { 0 };
+	struct drm_framebuffer *fb = &plane_config->fb->base;
+	struct xe_bo *bo;
+
+	switch (fb->modifier) {
+	case DRM_FORMAT_MOD_LINEAR:
+	case I915_FORMAT_MOD_X_TILED:
+	case I915_FORMAT_MOD_Y_TILED:
+	case I915_FORMAT_MOD_4_TILED:
+		break;
+	default:
+		drm_dbg(&dev_priv->drm,
+			"Unsupported modifier for initial FB: 0x%llx\n",
+			fb->modifier);
+		return false;
+	}
+
+	mode_cmd.pixel_format = fb->format->format;
+	mode_cmd.width = fb->width;
+	mode_cmd.height = fb->height;
+	mode_cmd.pitches[0] = fb->pitches[0];
+	mode_cmd.modifier[0] = fb->modifier;
+	mode_cmd.flags = DRM_MODE_FB_MODIFIERS;
+
+	bo = initial_plane_bo(dev_priv, plane_config);
+	if (!bo)
+		return false;
+
+	if (intel_framebuffer_init(to_intel_framebuffer(fb),
+				   bo, &mode_cmd)) {
+		drm_dbg_kms(&dev_priv->drm, "intel fb init failed\n");
+		goto err_bo;
+	}
+	/* Reference handed over to fb */
+	xe_bo_put(bo);
+
+	return true;
+
+err_bo:
+	xe_bo_unpin_map_no_vm(bo);
+	return false;
+}
+
+static void
+intel_find_initial_plane_obj(struct intel_crtc *crtc,
+			     struct intel_initial_plane_config *plane_config)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_plane *plane =
+		to_intel_plane(crtc->base.primary);
+	struct intel_plane_state *plane_state =
+		to_intel_plane_state(plane->base.state);
+	struct intel_crtc_state *crtc_state =
+		to_intel_crtc_state(crtc->base.state);
+	struct drm_framebuffer *fb;
+	struct i915_vma *vma;
+
+	/*
+	 * TODO:
+	 *   Disable planes if get_initial_plane_config() failed.
+	 *   Make sure things work if the surface base is not page aligned.
+	 */
+	if (!plane_config->fb)
+		return;
+
+	if (intel_alloc_initial_plane_obj(crtc, plane_config))
+		fb = &plane_config->fb->base;
+	else if (!intel_reuse_initial_plane_obj(dev_priv, plane_config, &fb))
+		goto nofb;
+
+	plane_state->uapi.rotation = plane_config->rotation;
+	intel_fb_fill_view(to_intel_framebuffer(fb),
+			   plane_state->uapi.rotation, &plane_state->view);
+
+	vma = intel_pin_and_fence_fb_obj(fb, false, &plane_state->view.gtt,
+					 false, &plane_state->flags);
+	if (IS_ERR(vma))
+		goto nofb;
+
+	plane_state->ggtt_vma = vma;
+	plane_state->uapi.src_x = 0;
+	plane_state->uapi.src_y = 0;
+	plane_state->uapi.src_w = fb->width << 16;
+	plane_state->uapi.src_h = fb->height << 16;
+
+	plane_state->uapi.crtc_x = 0;
+	plane_state->uapi.crtc_y = 0;
+	plane_state->uapi.crtc_w = fb->width;
+	plane_state->uapi.crtc_h = fb->height;
+
+	plane_state->uapi.fb = fb;
+	drm_framebuffer_get(fb);
+
+	plane_state->uapi.crtc = &crtc->base;
+	intel_plane_copy_uapi_to_hw_state(plane_state, plane_state, crtc);
+
+	atomic_or(plane->frontbuffer_bit, &to_intel_frontbuffer(fb)->bits);
+
+	plane_config->vma = vma;
+
+	/*
+	 * Flip to the newly created mapping ASAP, so we can re-use the
+	 * first part of GGTT for WOPCM, prevent flickering, and prevent
+	 * the lookup of sysmem scratch pages.
+	 */
+	plane->check_plane(crtc_state, plane_state);
+	plane->async_flip(plane, crtc_state, plane_state, true);
+	return;
+
+nofb:
+	/*
+	 * We've failed to reconstruct the BIOS FB.  Current display state
+	 * indicates that the primary plane is visible, but has a NULL FB,
+	 * which will lead to problems later if we don't fix it up.  The
+	 * simplest solution is to just disable the primary plane now and
+	 * pretend the BIOS never had it enabled.
+	 */
+	intel_plane_disable_noatomic(crtc, plane);
+}
+
+static void plane_config_fini(struct intel_initial_plane_config *plane_config)
+{
+	if (plane_config->fb) {
+		struct drm_framebuffer *fb = &plane_config->fb->base;
+
+		/* We may only have the stub and not a full framebuffer */
+		if (drm_framebuffer_read_refcount(fb))
+			drm_framebuffer_put(fb);
+		else
+			kfree(fb);
+	}
+}
+
+void intel_crtc_initial_plane_config(struct intel_crtc *crtc)
+{
+	struct xe_device *xe = to_xe_device(crtc->base.dev);
+	struct intel_initial_plane_config plane_config = {};
+
+	/*
+	 * Note that reserving the BIOS fb up front prevents us
+	 * from stuffing other stolen allocations like the ring
+	 * on top.  This prevents some ugliness at boot time, and
+	 * can even allow for smooth boot transitions if the BIOS
+	 * fb is large enough for the active pipe configuration.
+	 */
+	xe->display.funcs.display->get_initial_plane_config(crtc, &plane_config);
+
+	/*
+	 * If the fb is shared between multiple heads, we'll
+	 * just get the first one.
+	 */
+	intel_find_initial_plane_obj(crtc, &plane_config);
+
+	plane_config_fini(&plane_config);
+}
diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
new file mode 100644
index 000000000000..8e6dd061f2ae
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GFXPIPE_COMMANDS_H_
+#define _XE_GFXPIPE_COMMANDS_H_
+
+#include "instructions/xe_instr_defs.h"
+
+#define GFXPIPE_PIPELINE		REG_GENMASK(28, 27)
+#define   PIPELINE_COMMON		REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x0)
+#define   PIPELINE_SINGLE_DW		REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x1)
+#define   PIPELINE_COMPUTE		REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x2)
+#define   PIPELINE_3D			REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x3)
+
+#define GFXPIPE_OPCODE			REG_GENMASK(26, 24)
+#define GFXPIPE_SUBOPCODE		REG_GENMASK(23, 16)
+
+#define GFXPIPE_MATCH_MASK		(XE_INSTR_CMD_TYPE | \
+					 GFXPIPE_PIPELINE | \
+					 GFXPIPE_OPCODE | \
+					 GFXPIPE_SUBOPCODE)
+
+#define GFXPIPE_COMMON_CMD(opcode, subopcode) \
+	(XE_INSTR_GFXPIPE | PIPELINE_COMMON | \
+	 REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \
+	 REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode))
+
+#define GFXPIPE_SINGLE_DW_CMD(opcode, subopcode) \
+	(XE_INSTR_GFXPIPE | PIPELINE_SINGLE_DW | \
+	 REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \
+	 REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode))
+
+#define GFXPIPE_3D_CMD(opcode, subopcode) \
+	(XE_INSTR_GFXPIPE | PIPELINE_3D | \
+	 REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \
+	 REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode))
+
+#define GFXPIPE_COMPUTE_CMD(opcode, subopcode) \
+	(XE_INSTR_GFXPIPE | PIPELINE_COMPUTE | \
+	 REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \
+	 REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode))
+
+#define STATE_BASE_ADDRESS			GFXPIPE_COMMON_CMD(0x1, 0x1)
+#define STATE_SIP				GFXPIPE_COMMON_CMD(0x1, 0x2)
+#define GPGPU_CSR_BASE_ADDRESS			GFXPIPE_COMMON_CMD(0x1, 0x4)
+#define STATE_COMPUTE_MODE			GFXPIPE_COMMON_CMD(0x1, 0x5)
+#define CMD_3DSTATE_BTD				GFXPIPE_COMMON_CMD(0x1, 0x6)
+
+#define CMD_3DSTATE_VF_STATISTICS		GFXPIPE_SINGLE_DW_CMD(0x0, 0xB)
+
+#define PIPELINE_SELECT				GFXPIPE_SINGLE_DW_CMD(0x1, 0x4)
+
+#define CMD_3DSTATE_DRAWING_RECTANGLE_FAST	GFXPIPE_3D_CMD(0x0, 0x0)
+#define CMD_3DSTATE_CLEAR_PARAMS		GFXPIPE_3D_CMD(0x0, 0x4)
+#define CMD_3DSTATE_DEPTH_BUFFER		GFXPIPE_3D_CMD(0x0, 0x5)
+#define CMD_3DSTATE_STENCIL_BUFFER		GFXPIPE_3D_CMD(0x0, 0x6)
+#define CMD_3DSTATE_HIER_DEPTH_BUFFER		GFXPIPE_3D_CMD(0x0, 0x7)
+#define CMD_3DSTATE_VERTEX_BUFFERS		GFXPIPE_3D_CMD(0x0, 0x8)
+#define CMD_3DSTATE_VERTEX_ELEMENTS		GFXPIPE_3D_CMD(0x0, 0x9)
+#define CMD_3DSTATE_INDEX_BUFFER		GFXPIPE_3D_CMD(0x0, 0xA)
+#define CMD_3DSTATE_VF				GFXPIPE_3D_CMD(0x0, 0xC)
+#define CMD_3DSTATE_MULTISAMPLE			GFXPIPE_3D_CMD(0x0, 0xD)
+#define CMD_3DSTATE_CC_STATE_POINTERS		GFXPIPE_3D_CMD(0x0, 0xE)
+#define CMD_3DSTATE_SCISSOR_STATE_POINTERS	GFXPIPE_3D_CMD(0x0, 0xF)
+#define CMD_3DSTATE_VS				GFXPIPE_3D_CMD(0x0, 0x10)
+#define CMD_3DSTATE_GS				GFXPIPE_3D_CMD(0x0, 0x11)
+#define CMD_3DSTATE_CLIP			GFXPIPE_3D_CMD(0x0, 0x12)
+#define CMD_3DSTATE_SF				GFXPIPE_3D_CMD(0x0, 0x13)
+#define CMD_3DSTATE_WM				GFXPIPE_3D_CMD(0x0, 0x14)
+#define CMD_3DSTATE_CONSTANT_VS			GFXPIPE_3D_CMD(0x0, 0x15)
+#define CMD_3DSTATE_CONSTANT_GS			GFXPIPE_3D_CMD(0x0, 0x16)
+#define CMD_3DSTATE_SAMPLE_MASK			GFXPIPE_3D_CMD(0x0, 0x18)
+#define CMD_3DSTATE_CONSTANT_HS			GFXPIPE_3D_CMD(0x0, 0x19)
+#define CMD_3DSTATE_CONSTANT_DS			GFXPIPE_3D_CMD(0x0, 0x1A)
+#define CMD_3DSTATE_HS				GFXPIPE_3D_CMD(0x0, 0x1B)
+#define CMD_3DSTATE_TE				GFXPIPE_3D_CMD(0x0, 0x1C)
+#define CMD_3DSTATE_DS				GFXPIPE_3D_CMD(0x0, 0x1D)
+#define CMD_3DSTATE_STREAMOUT			GFXPIPE_3D_CMD(0x0, 0x1E)
+#define CMD_3DSTATE_SBE				GFXPIPE_3D_CMD(0x0, 0x1F)
+#define CMD_3DSTATE_PS				GFXPIPE_3D_CMD(0x0, 0x20)
+#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP	GFXPIPE_3D_CMD(0x0, 0x21)
+#define CMD_3DSTATE_CPS_POINTERS		GFXPIPE_3D_CMD(0x0, 0x22)
+#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_CC	GFXPIPE_3D_CMD(0x0, 0x23)
+#define CMD_3DSTATE_BLEND_STATE_POINTERS	GFXPIPE_3D_CMD(0x0, 0x24)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_VS	GFXPIPE_3D_CMD(0x0, 0x26)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_HS	GFXPIPE_3D_CMD(0x0, 0x27)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_DS	GFXPIPE_3D_CMD(0x0, 0x28)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_GS	GFXPIPE_3D_CMD(0x0, 0x29)
+#define CMD_3DSTATE_BINDING_TABLE_POINTERS_PS	GFXPIPE_3D_CMD(0x0, 0x2A)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS	GFXPIPE_3D_CMD(0x0, 0x2B)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS	GFXPIPE_3D_CMD(0x0, 0x2C)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS	GFXPIPE_3D_CMD(0x0, 0x2D)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS	GFXPIPE_3D_CMD(0x0, 0x2E)
+#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_PS	GFXPIPE_3D_CMD(0x0, 0x2F)
+#define CMD_3DSTATE_VF_INSTANCING		GFXPIPE_3D_CMD(0x0, 0x49)
+#define CMD_3DSTATE_VF_SGVS			GFXPIPE_3D_CMD(0x0, 0x4A)
+#define CMD_3DSTATE_VF_TOPOLOGY			GFXPIPE_3D_CMD(0x0, 0x4B)
+#define CMD_3DSTATE_WM_CHROMAKEY		GFXPIPE_3D_CMD(0x0, 0x4C)
+#define CMD_3DSTATE_PS_BLEND			GFXPIPE_3D_CMD(0x0, 0x4D)
+#define CMD_3DSTATE_WM_DEPTH_STENCIL		GFXPIPE_3D_CMD(0x0, 0x4E)
+#define CMD_3DSTATE_PS_EXTRA			GFXPIPE_3D_CMD(0x0, 0x4F)
+#define CMD_3DSTATE_RASTER			GFXPIPE_3D_CMD(0x0, 0x50)
+#define CMD_3DSTATE_SBE_SWIZ			GFXPIPE_3D_CMD(0x0, 0x51)
+#define CMD_3DSTATE_WM_HZ_OP			GFXPIPE_3D_CMD(0x0, 0x52)
+#define CMD_3DSTATE_VF_COMPONENT_PACKING	GFXPIPE_3D_CMD(0x0, 0x55)
+#define CMD_3DSTATE_VF_SGVS_2			GFXPIPE_3D_CMD(0x0, 0x56)
+#define CMD_3DSTATE_VFG				GFXPIPE_3D_CMD(0x0, 0x57)
+#define CMD_3DSTATE_URB_ALLOC_VS		GFXPIPE_3D_CMD(0x0, 0x58)
+#define CMD_3DSTATE_URB_ALLOC_HS		GFXPIPE_3D_CMD(0x0, 0x59)
+#define CMD_3DSTATE_URB_ALLOC_DS		GFXPIPE_3D_CMD(0x0, 0x5A)
+#define CMD_3DSTATE_URB_ALLOC_GS		GFXPIPE_3D_CMD(0x0, 0x5B)
+#define CMD_3DSTATE_SO_BUFFER_INDEX_0		GFXPIPE_3D_CMD(0x0, 0x60)
+#define CMD_3DSTATE_SO_BUFFER_INDEX_1		GFXPIPE_3D_CMD(0x0, 0x61)
+#define CMD_3DSTATE_SO_BUFFER_INDEX_2		GFXPIPE_3D_CMD(0x0, 0x62)
+#define CMD_3DSTATE_SO_BUFFER_INDEX_3		GFXPIPE_3D_CMD(0x0, 0x63)
+#define CMD_3DSTATE_PRIMITIVE_REPLICATION	GFXPIPE_3D_CMD(0x0, 0x6C)
+#define CMD_3DSTATE_TBIMR_TILE_PASS_INFO	GFXPIPE_3D_CMD(0x0, 0x6E)
+#define CMD_3DSTATE_AMFS			GFXPIPE_3D_CMD(0x0, 0x6F)
+#define CMD_3DSTATE_DEPTH_BOUNDS		GFXPIPE_3D_CMD(0x0, 0x71)
+#define CMD_3DSTATE_AMFS_TEXTURE_POINTERS	GFXPIPE_3D_CMD(0x0, 0x72)
+#define CMD_3DSTATE_CONSTANT_TS_POINTER		GFXPIPE_3D_CMD(0x0, 0x73)
+#define CMD_3DSTATE_MESH_CONTROL		GFXPIPE_3D_CMD(0x0, 0x77)
+#define CMD_3DSTATE_MESH_DISTRIB		GFXPIPE_3D_CMD(0x0, 0x78)
+#define CMD_3DSTATE_TASK_REDISTRIB		GFXPIPE_3D_CMD(0x0, 0x79)
+#define CMD_3DSTATE_MESH_SHADER			GFXPIPE_3D_CMD(0x0, 0x7A)
+#define CMD_3DSTATE_MESH_SHADER_DATA		GFXPIPE_3D_CMD(0x0, 0x7B)
+#define CMD_3DSTATE_TASK_CONTROL		GFXPIPE_3D_CMD(0x0, 0x7C)
+#define CMD_3DSTATE_TASK_SHADER			GFXPIPE_3D_CMD(0x0, 0x7D)
+#define CMD_3DSTATE_TASK_SHADER_DATA		GFXPIPE_3D_CMD(0x0, 0x7E)
+#define CMD_3DSTATE_URB_ALLOC_MESH		GFXPIPE_3D_CMD(0x0, 0x7F)
+#define CMD_3DSTATE_URB_ALLOC_TASK		GFXPIPE_3D_CMD(0x0, 0x80)
+#define CMD_3DSTATE_CLIP_MESH			GFXPIPE_3D_CMD(0x0, 0x81)
+#define CMD_3DSTATE_SBE_MESH			GFXPIPE_3D_CMD(0x0, 0x82)
+#define CMD_3DSTATE_CPSIZE_CONTROL_BUFFER	GFXPIPE_3D_CMD(0x0, 0x83)
+
+#define CMD_3DSTATE_DRAWING_RECTANGLE		GFXPIPE_3D_CMD(0x1, 0x0)
+#define CMD_3DSTATE_CHROMA_KEY			GFXPIPE_3D_CMD(0x1, 0x4)
+#define CMD_3DSTATE_POLY_STIPPLE_OFFSET		GFXPIPE_3D_CMD(0x1, 0x6)
+#define CMD_3DSTATE_POLY_STIPPLE_PATTERN	GFXPIPE_3D_CMD(0x1, 0x7)
+#define CMD_3DSTATE_LINE_STIPPLE		GFXPIPE_3D_CMD(0x1, 0x8)
+#define CMD_3DSTATE_AA_LINE_PARAMETERS		GFXPIPE_3D_CMD(0x1, 0xA)
+#define CMD_3DSTATE_MONOFILTER_SIZE		GFXPIPE_3D_CMD(0x1, 0x11)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_VS	GFXPIPE_3D_CMD(0x1, 0x12)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_HS	GFXPIPE_3D_CMD(0x1, 0x13)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_DS	GFXPIPE_3D_CMD(0x1, 0x14)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_GS	GFXPIPE_3D_CMD(0x1, 0x15)
+#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_PS	GFXPIPE_3D_CMD(0x1, 0x16)
+#define CMD_3DSTATE_SO_DECL_LIST		GFXPIPE_3D_CMD(0x1, 0x17)
+#define   CMD_3DSTATE_SO_DECL_LIST_DW_LEN	REG_GENMASK(8, 0)
+#define CMD_3DSTATE_SO_BUFFER			GFXPIPE_3D_CMD(0x1, 0x18)
+#define CMD_3DSTATE_BINDING_TABLE_POOL_ALLOC	GFXPIPE_3D_CMD(0x1, 0x19)
+#define CMD_3DSTATE_SAMPLE_PATTERN		GFXPIPE_3D_CMD(0x1, 0x1C)
+#define CMD_3DSTATE_3D_MODE			GFXPIPE_3D_CMD(0x1, 0x1E)
+#define CMD_3DSTATE_SUBSLICE_HASH_TABLE		GFXPIPE_3D_CMD(0x1, 0x1F)
+#define CMD_3DSTATE_SLICE_TABLE_STATE_POINTERS	GFXPIPE_3D_CMD(0x1, 0x20)
+#define CMD_3DSTATE_PTBR_TILE_PASS_INFO		GFXPIPE_3D_CMD(0x1, 0x22)
+
+#endif
diff --git a/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h
new file mode 100644
index 000000000000..f8949cad9d0f
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_COMMANDS_H_
+#define _XE_GSC_COMMANDS_H_
+
+#include "instructions/xe_instr_defs.h"
+
+/*
+ * All GSCCS-specific commands have fixed length, so we can include it in the
+ * defines. Note that the generic GSC command header structure includes an
+ * optional data field in bits 9-21, but there are no commands that actually use
+ * it; some of the commands are instead defined as having an extended length
+ * field spanning bits 0-15, even if the extra bits are not required because the
+ * longest GSCCS command is only 8 dwords. To handle this, the defines below use
+ * a single field for both data and len. If we ever get a commands that does
+ * actually have data and this approach doesn't work for it we can re-work it
+ * at that point.
+ */
+
+#define GSC_OPCODE		REG_GENMASK(28, 22)
+#define GSC_CMD_DATA_AND_LEN	REG_GENMASK(21, 0)
+
+#define __GSC_INSTR(op, dl) \
+	(XE_INSTR_GSC | \
+	REG_FIELD_PREP(GSC_OPCODE, op) | \
+	REG_FIELD_PREP(GSC_CMD_DATA_AND_LEN, dl))
+
+#define GSC_HECI_CMD_PKT __GSC_INSTR(0, 6)
+
+#define GSC_FW_LOAD __GSC_INSTR(1, 2)
+#define   GSC_FW_LOAD_LIMIT_VALID REG_BIT(31)
+
+#endif
diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
new file mode 100644
index 000000000000..04179b2a48e1
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_INSTR_DEFS_H_
+#define _XE_INSTR_DEFS_H_
+
+#include "regs/xe_reg_defs.h"
+
+/*
+ * The first dword of any GPU instruction is the "instruction header."  Bits
+ * 31:29 identify the general type of the command and determine how exact
+ * opcodes and sub-opcodes will be encoded in the remaining bits.
+ */
+#define XE_INSTR_CMD_TYPE		GENMASK(31, 29)
+#define   XE_INSTR_MI			REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0)
+#define   XE_INSTR_GSC			REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2)
+#define   XE_INSTR_GFXPIPE		REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3)
+
+/*
+ * Most (but not all) instructions have a "length" field in the instruction
+ * header.  The value expected is the total number of dwords for the
+ * instruction, minus two.
+ *
+ * Some instructions have length fields longer or shorter than 8 bits, but
+ * those are rare.  This definition can be used for the common case where
+ * the length field is from 7:0.
+ */
+#define XE_INSTR_LEN_MASK		GENMASK(7, 0)
+#define XE_INSTR_NUM_DW(x)		REG_FIELD_PREP(XE_INSTR_LEN_MASK, (x) - 2)
+
+#endif
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
new file mode 100644
index 000000000000..1cfa96167fde
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MI_COMMANDS_H_
+#define _XE_MI_COMMANDS_H_
+
+#include "instructions/xe_instr_defs.h"
+
+/*
+ * MI (Memory Interface) commands are supported by all GT engines.  They
+ * provide general memory operations and command streamer control.  MI commands
+ * have a command type of 0x0 (MI_COMMAND) in bits 31:29 of the instruction
+ * header dword and a specific MI opcode in bits 28:23.
+ */
+
+#define MI_OPCODE			REG_GENMASK(28, 23)
+#define MI_SUBOPCODE			REG_GENMASK(22, 17)  /* used with MI_EXPANSION */
+
+#define __MI_INSTR(opcode) \
+	(XE_INSTR_MI | REG_FIELD_PREP(MI_OPCODE, opcode))
+
+#define MI_NOOP				__MI_INSTR(0x0)
+#define MI_USER_INTERRUPT		__MI_INSTR(0x2)
+#define MI_ARB_CHECK			__MI_INSTR(0x5)
+
+#define MI_ARB_ON_OFF			__MI_INSTR(0x8)
+#define   MI_ARB_ENABLE			REG_BIT(0)
+#define   MI_ARB_DISABLE		0x0
+
+#define MI_BATCH_BUFFER_END		__MI_INSTR(0xA)
+#define MI_TOPOLOGY_FILTER		__MI_INSTR(0xD)
+#define MI_FORCE_WAKEUP			__MI_INSTR(0x1D)
+
+#define MI_STORE_DATA_IMM		__MI_INSTR(0x20)
+#define   MI_SDI_GGTT			REG_BIT(22)
+#define   MI_SDI_LEN_DW			GENMASK(9, 0)
+#define   MI_SDI_NUM_DW(x)		REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2)
+#define   MI_SDI_NUM_QW(x)		(REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \
+					 REG_BIT(21))
+
+#define MI_LOAD_REGISTER_IMM		__MI_INSTR(0x22)
+#define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
+#define   MI_LRI_MMIO_REMAP_EN		REG_BIT(17)
+#define   MI_LRI_NUM_REGS(x)		XE_INSTR_NUM_DW(2 * (x) + 1)
+#define   MI_LRI_FORCE_POSTED		REG_BIT(12)
+
+#define MI_FLUSH_DW			__MI_INSTR(0x26)
+#define   MI_FLUSH_DW_STORE_INDEX	REG_BIT(21)
+#define   MI_INVALIDATE_TLB		REG_BIT(18)
+#define   MI_FLUSH_DW_CCS		REG_BIT(16)
+#define   MI_FLUSH_DW_OP_STOREDW	REG_BIT(14)
+#define   MI_FLUSH_DW_LEN_DW		REG_GENMASK(5, 0)
+#define   MI_FLUSH_IMM_DW		REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 4 - 2)
+#define   MI_FLUSH_IMM_QW		REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 5 - 2)
+#define   MI_FLUSH_DW_USE_GTT		REG_BIT(2)
+
+#define MI_BATCH_BUFFER_START		__MI_INSTR(0x31)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
new file mode 100644
index 000000000000..5592774fc690
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_ENGINE_REGS_H_
+#define _XE_ENGINE_REGS_H_
+
+#include <asm/page.h>
+
+#include "regs/xe_reg_defs.h"
+
+/*
+ * These *_BASE values represent the MMIO offset where each hardware engine's
+ * registers start.  The other definitions in this header are parameterized
+ * macros that will take one of these values as a parameter.
+ */
+#define RENDER_RING_BASE			0x02000
+#define BSD_RING_BASE				0x1c0000
+#define BSD2_RING_BASE				0x1c4000
+#define BSD3_RING_BASE				0x1d0000
+#define BSD4_RING_BASE				0x1d4000
+#define XEHP_BSD5_RING_BASE			0x1e0000
+#define XEHP_BSD6_RING_BASE			0x1e4000
+#define XEHP_BSD7_RING_BASE			0x1f0000
+#define XEHP_BSD8_RING_BASE			0x1f4000
+#define VEBOX_RING_BASE				0x1c8000
+#define VEBOX2_RING_BASE			0x1d8000
+#define XEHP_VEBOX3_RING_BASE			0x1e8000
+#define XEHP_VEBOX4_RING_BASE			0x1f8000
+#define COMPUTE0_RING_BASE			0x1a000
+#define COMPUTE1_RING_BASE			0x1c000
+#define COMPUTE2_RING_BASE			0x1e000
+#define COMPUTE3_RING_BASE			0x26000
+#define BLT_RING_BASE				0x22000
+#define XEHPC_BCS1_RING_BASE			0x3e0000
+#define XEHPC_BCS2_RING_BASE			0x3e2000
+#define XEHPC_BCS3_RING_BASE			0x3e4000
+#define XEHPC_BCS4_RING_BASE			0x3e6000
+#define XEHPC_BCS5_RING_BASE			0x3e8000
+#define XEHPC_BCS6_RING_BASE			0x3ea000
+#define XEHPC_BCS7_RING_BASE			0x3ec000
+#define XEHPC_BCS8_RING_BASE			0x3ee000
+#define GSCCS_RING_BASE				0x11a000
+
+#define RING_TAIL(base)				XE_REG((base) + 0x30)
+
+#define RING_HEAD(base)				XE_REG((base) + 0x34)
+#define   HEAD_ADDR				0x001FFFFC
+
+#define RING_START(base)			XE_REG((base) + 0x38)
+
+#define RING_CTL(base)				XE_REG((base) + 0x3c)
+#define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
+#define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
+
+#define RING_PSMI_CTL(base)			XE_REG((base) + 0x50, XE_REG_OPTION_MASKED)
+#define   RC_SEMA_IDLE_MSG_DISABLE		REG_BIT(12)
+#define   WAIT_FOR_EVENT_POWER_DOWN_DISABLE	REG_BIT(7)
+#define   IDLE_MSG_DISABLE			REG_BIT(0)
+
+#define RING_PWRCTX_MAXCNT(base)		XE_REG((base) + 0x54)
+#define   IDLE_WAIT_TIME			REG_GENMASK(19, 0)
+
+#define RING_ACTHD_UDW(base)			XE_REG((base) + 0x5c)
+#define RING_DMA_FADD_UDW(base)			XE_REG((base) + 0x60)
+#define RING_IPEHR(base)			XE_REG((base) + 0x68)
+#define RING_ACTHD(base)			XE_REG((base) + 0x74)
+#define RING_DMA_FADD(base)			XE_REG((base) + 0x78)
+#define RING_HWS_PGA(base)			XE_REG((base) + 0x80)
+#define RING_HWSTAM(base)			XE_REG((base) + 0x98)
+#define RING_MI_MODE(base)			XE_REG((base) + 0x9c)
+#define RING_NOPID(base)			XE_REG((base) + 0x94)
+
+#define FF_THREAD_MODE(base)			XE_REG((base) + 0xa0)
+#define   FF_TESSELATION_DOP_GATE_DISABLE	BIT(19)
+
+#define RING_IMR(base)				XE_REG((base) + 0xa8)
+
+#define RING_EIR(base)				XE_REG((base) + 0xb0)
+#define RING_EMR(base)				XE_REG((base) + 0xb4)
+#define RING_ESR(base)				XE_REG((base) + 0xb8)
+
+#define RING_CMD_CCTL(base)			XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)
+/*
+ * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
+ * The lsb of each can be considered a separate enabling bit for encryption.
+ * 6:0 == default MOCS value for reads  =>  6:1 == table index for reads.
+ * 13:7 == default MOCS value for writes => 13:8 == table index for writes.
+ * 15:14 == Reserved => 31:30 are set to 0.
+ */
+#define   CMD_CCTL_WRITE_OVERRIDE_MASK		REG_GENMASK(13, 8)
+#define   CMD_CCTL_READ_OVERRIDE_MASK		REG_GENMASK(6, 1)
+
+#define CSFE_CHICKEN1(base)			XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED)
+#define   GHWSP_CSB_REPORT_DIS			REG_BIT(15)
+#define   PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS	REG_BIT(14)
+
+#define FF_SLICE_CS_CHICKEN1(base)		XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED)
+#define   FFSC_PERCTX_PREEMPT_CTRL		REG_BIT(14)
+
+#define FF_SLICE_CS_CHICKEN2(base)		XE_REG((base) + 0xe4, XE_REG_OPTION_MASKED)
+#define   PERF_FIX_BALANCING_CFE_DISABLE	REG_BIT(15)
+
+#define CS_DEBUG_MODE1(base)			XE_REG((base) + 0xec, XE_REG_OPTION_MASKED)
+#define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
+#define   REPLAY_MODE_GRANULARITY		REG_BIT(0)
+
+#define RING_BBADDR(base)			XE_REG((base) + 0x140)
+#define RING_BBADDR_UDW(base)			XE_REG((base) + 0x168)
+
+#define BCS_SWCTRL(base)			XE_REG((base) + 0x200, XE_REG_OPTION_MASKED)
+#define   BCS_SWCTRL_DISABLE_256B		REG_BIT(2)
+
+/* Handling MOCS value in BLIT_CCTL like it was done CMD_CCTL */
+#define BLIT_CCTL(base)				XE_REG((base) + 0x204)
+#define   BLIT_CCTL_DST_MOCS_MASK		REG_GENMASK(14, 9)
+#define   BLIT_CCTL_SRC_MOCS_MASK		REG_GENMASK(6, 1)
+
+#define RING_EXECLIST_STATUS_LO(base)		XE_REG((base) + 0x234)
+#define RING_EXECLIST_STATUS_HI(base)		XE_REG((base) + 0x234 + 4)
+
+#define RING_CONTEXT_CONTROL(base)		XE_REG((base) + 0x244)
+#define	  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH	REG_BIT(3)
+#define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	REG_BIT(0)
+
+#define RING_MODE(base)				XE_REG((base) + 0x29c)
+#define   GFX_DISABLE_LEGACY_MODE		REG_BIT(3)
+
+#define RING_TIMESTAMP(base)			XE_REG((base) + 0x358)
+
+#define RING_TIMESTAMP_UDW(base)		XE_REG((base) + 0x358 + 4)
+#define   RING_VALID_MASK			0x00000001
+#define   RING_VALID				0x00000001
+#define   STOP_RING				REG_BIT(8)
+#define   TAIL_ADDR				0x001FFFF8
+
+#define RING_CTX_TIMESTAMP(base)		XE_REG((base) + 0x3a8)
+
+#define RING_FORCE_TO_NONPRIV(base, i)		XE_REG(((base) + 0x4d0) + (i) * 4)
+#define   RING_FORCE_TO_NONPRIV_DENY		REG_BIT(30)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_MASK	REG_GENMASK(29, 28)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_RW	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 0)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_RD	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 1)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_WR	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 2)
+#define   RING_FORCE_TO_NONPRIV_ACCESS_INVALID	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 3)
+#define   RING_FORCE_TO_NONPRIV_ADDRESS_MASK	REG_GENMASK(25, 2)
+#define   RING_FORCE_TO_NONPRIV_RANGE_MASK	REG_GENMASK(1, 0)
+#define   RING_FORCE_TO_NONPRIV_RANGE_1		REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 0)
+#define   RING_FORCE_TO_NONPRIV_RANGE_4		REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 1)
+#define   RING_FORCE_TO_NONPRIV_RANGE_16	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 2)
+#define   RING_FORCE_TO_NONPRIV_RANGE_64	REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 3)
+#define   RING_FORCE_TO_NONPRIV_MASK_VALID	(RING_FORCE_TO_NONPRIV_RANGE_MASK | \
+						 RING_FORCE_TO_NONPRIV_ACCESS_MASK | \
+						 RING_FORCE_TO_NONPRIV_DENY)
+#define   RING_MAX_NONPRIV_SLOTS  12
+
+#define RING_EXECLIST_SQ_CONTENTS_LO(base)	XE_REG((base) + 0x510)
+#define RING_EXECLIST_SQ_CONTENTS_HI(base)	XE_REG((base) + 0x510 + 4)
+
+#define RING_EXECLIST_CONTROL(base)		XE_REG((base) + 0x550)
+#define	  EL_CTRL_LOAD				REG_BIT(0)
+
+#define CS_CHICKEN1(base)			XE_REG((base) + 0x580, XE_REG_OPTION_MASKED)
+#define   PREEMPT_GPGPU_LEVEL(hi, lo)		(((hi) << 2) | ((lo) << 1))
+#define   PREEMPT_GPGPU_MID_THREAD_LEVEL	PREEMPT_GPGPU_LEVEL(0, 0)
+#define   PREEMPT_GPGPU_THREAD_GROUP_LEVEL	PREEMPT_GPGPU_LEVEL(0, 1)
+#define   PREEMPT_GPGPU_COMMAND_LEVEL		PREEMPT_GPGPU_LEVEL(1, 0)
+#define   PREEMPT_GPGPU_LEVEL_MASK		PREEMPT_GPGPU_LEVEL(1, 1)
+#define   PREEMPT_3D_OBJECT_LEVEL		REG_BIT(0)
+
+#define VDBOX_CGCTL3F08(base)			XE_REG((base) + 0x3f08)
+#define   CG3DDISHRS_CLKGATE_DIS		REG_BIT(5)
+
+#define VDBOX_CGCTL3F10(base)			XE_REG((base) + 0x3f10)
+#define   IECPUNIT_CLKGATE_DIS			REG_BIT(22)
+
+#define VDBOX_CGCTL3F18(base)			XE_REG((base) + 0x3f18)
+#define   ALNUNIT_CLKGATE_DIS			REG_BIT(13)
+
+#define VDBOX_CGCTL3F1C(base)			XE_REG((base) + 0x3f1c)
+#define   MFXPIPE_CLKGATE_DIS			REG_BIT(3)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
new file mode 100644
index 000000000000..a255946b6f77
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_COMMANDS_H_
+#define _XE_GPU_COMMANDS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define XY_CTRL_SURF_COPY_BLT		((2 << 29) | (0x48 << 22) | 3)
+#define   SRC_ACCESS_TYPE_SHIFT		21
+#define   DST_ACCESS_TYPE_SHIFT		20
+#define   CCS_SIZE_MASK			GENMASK(17, 8)
+#define   XE2_CCS_SIZE_MASK		GENMASK(18, 9)
+#define   XY_CTRL_SURF_MOCS_MASK	GENMASK(31, 26)
+#define   XE2_XY_CTRL_SURF_MOCS_INDEX_MASK	GENMASK(31, 28)
+#define   NUM_CCS_BYTES_PER_BLOCK	256
+#define   NUM_BYTES_PER_CCS_BYTE(_xe)	(GRAPHICS_VER(_xe) >= 20 ? 512 : 256)
+
+#define XY_FAST_COLOR_BLT_CMD		(2 << 29 | 0x44 << 22)
+#define   XY_FAST_COLOR_BLT_DEPTH_32	(2 << 19)
+#define   XY_FAST_COLOR_BLT_DW		16
+#define   XY_FAST_COLOR_BLT_MOCS_MASK	GENMASK(27, 22)
+#define   XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK	GENMASK(27, 24)
+#define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
+
+#define XY_FAST_COPY_BLT_CMD		(2 << 29 | 0x42 << 22)
+#define   XY_FAST_COPY_BLT_DEPTH_32	(3<<24)
+#define   XY_FAST_COPY_BLT_D1_SRC_TILE4	REG_BIT(31)
+#define   XY_FAST_COPY_BLT_D1_DST_TILE4	REG_BIT(30)
+#define   XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK	GENMASK(23, 20)
+
+#define	PVC_MEM_SET_CMD		(2 << 29 | 0x5b << 22)
+#define   PVC_MEM_SET_CMD_LEN_DW	7
+#define   PVC_MEM_SET_MATRIX		REG_BIT(17)
+#define   PVC_MEM_SET_DATA_FIELD	GENMASK(31, 24)
+/* Bspec lists field as [6:0], but index alone is from [6:1] */
+#define   PVC_MEM_SET_MOCS_INDEX_MASK	GENMASK(6, 1)
+#define   XE2_MEM_SET_MOCS_INDEX_MASK	GENMASK(6, 3)
+
+#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+
+#define	  PIPE_CONTROL0_HDC_PIPELINE_FLUSH		BIT(9)	/* gen12 */
+
+#define   PIPE_CONTROL_COMMAND_CACHE_INVALIDATE		(1<<29)
+#define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28)
+#define   PIPE_CONTROL_AMFS_FLUSH			(1<<25)
+#define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24)
+#define   PIPE_CONTROL_LRI_POST_SYNC			BIT(23)
+#define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
+#define   PIPE_CONTROL_CS_STALL				(1<<20)
+#define   PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET		(1<<19)
+#define	  PIPE_CONTROL_TLB_INVALIDATE			BIT(18)
+#define   PIPE_CONTROL_PSD_SYNC				(1<<17)
+#define   PIPE_CONTROL_QW_WRITE				(1<<14)
+#define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
+#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12)
+#define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11)
+#define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10)
+#define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
+#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7)
+#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
+#define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
+#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
+#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
+#define   PIPE_CONTROL_STALL_AT_SCOREBOARD		(1<<1)
+#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
new file mode 100644
index 000000000000..9886ec9cb08e
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_REGS_H_
+#define _XE_GSC_REGS_H_
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include "regs/xe_reg_defs.h"
+
+/* Definitions of GSC H/W registers, bits, etc */
+
+#define MTL_GSC_HECI1_BASE	0x00116000
+#define MTL_GSC_HECI2_BASE	0x00117000
+
+#define HECI_H_CSR(base)	XE_REG((base) + 0x4)
+#define   HECI_H_CSR_IE		REG_BIT(0)
+#define   HECI_H_CSR_IS		REG_BIT(1)
+#define   HECI_H_CSR_IG		REG_BIT(2)
+#define   HECI_H_CSR_RDY	REG_BIT(3)
+#define   HECI_H_CSR_RST	REG_BIT(4)
+
+/*
+ * The FWSTS register values are FW defined and can be different between
+ * HECI1 and HECI2
+ */
+#define HECI_FWSTS1(base)				XE_REG((base) + 0xc40)
+#define   HECI1_FWSTS1_CURRENT_STATE			REG_GENMASK(3, 0)
+#define   HECI1_FWSTS1_CURRENT_STATE_RESET		0
+#define   HECI1_FWSTS1_PROXY_STATE_NORMAL		5
+#define   HECI1_FWSTS1_INIT_COMPLETE			REG_BIT(9)
+#define HECI_FWSTS5(base)				XE_REG((base) + 0xc68)
+#define   HECI1_FWSTS5_HUC_AUTH_DONE			REG_BIT(19)
+
+#define HECI_H_GS1(base)	XE_REG((base) + 0xc4c)
+#define   HECI_H_GS1_ER_PREP	REG_BIT(0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
new file mode 100644
index 000000000000..1dd361046b5d
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -0,0 +1,478 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_REGS_H_
+#define _XE_GT_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+/*
+ * The GSI register range [0x0 - 0x40000) is replicated at a higher offset
+ * for the media GT.  xe_mmio and xe_gt_mcr functions will automatically
+ * translate offsets by MEDIA_GT_GSI_OFFSET when operating on the media GT.
+ */
+#define MEDIA_GT_GSI_OFFSET				0x380000
+#define MEDIA_GT_GSI_LENGTH				0x40000
+
+/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */
+#define MTL_MIRROR_TARGET_WP1				XE_REG(0xc60)
+#define   MTL_CAGF_MASK					REG_GENMASK(8, 0)
+#define   MTL_CC_MASK					REG_GENMASK(12, 9)
+
+/* RPM unit config (Gen8+) */
+#define RPM_CONFIG0					XE_REG(0xd00)
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK		REG_GENMASK(5, 3)
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ		0
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	1
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ	2
+#define   RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ		3
+#define   RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK		REG_GENMASK(2, 1)
+
+#define FORCEWAKE_ACK_MEDIA_VDBOX(n)		XE_REG(0xd50 + (n) * 4)
+#define FORCEWAKE_ACK_MEDIA_VEBOX(n)		XE_REG(0xd70 + (n) * 4)
+#define FORCEWAKE_ACK_RENDER			XE_REG(0xd84)
+
+#define GMD_ID					XE_REG(0xd8c)
+#define   GMD_ID_ARCH_MASK			REG_GENMASK(31, 22)
+#define   GMD_ID_RELEASE_MASK			REG_GENMASK(21, 14)
+#define   GMD_ID_REVID				REG_GENMASK(5, 0)
+
+#define FORCEWAKE_ACK_GSC			XE_REG(0xdf8)
+#define FORCEWAKE_ACK_GT_MTL			XE_REG(0xdfc)
+
+#define MCFG_MCR_SELECTOR			XE_REG(0xfd0)
+#define MTL_MCR_SELECTOR			XE_REG(0xfd4)
+#define SF_MCR_SELECTOR				XE_REG(0xfd8)
+#define MCR_SELECTOR				XE_REG(0xfdc)
+#define GAM_MCR_SELECTOR			XE_REG(0xfe0)
+#define   MCR_MULTICAST				REG_BIT(31)
+#define   MCR_SLICE_MASK			REG_GENMASK(30, 27)
+#define   MCR_SLICE(slice)			REG_FIELD_PREP(MCR_SLICE_MASK, slice)
+#define   MCR_SUBSLICE_MASK			REG_GENMASK(26, 24)
+#define   MCR_SUBSLICE(subslice)		REG_FIELD_PREP(MCR_SUBSLICE_MASK, subslice)
+#define   MTL_MCR_GROUPID			REG_GENMASK(11, 8)
+#define   MTL_MCR_INSTANCEID			REG_GENMASK(3, 0)
+
+#define PS_INVOCATION_COUNT			XE_REG(0x2348)
+
+#define XELP_GLOBAL_MOCS(i)			XE_REG(0x4000 + (i) * 4)
+#define XEHP_GLOBAL_MOCS(i)			XE_REG_MCR(0x4000 + (i) * 4)
+#define CCS_AUX_INV				XE_REG(0x4208)
+
+#define VD0_AUX_INV				XE_REG(0x4218)
+#define VE0_AUX_INV				XE_REG(0x4238)
+
+#define VE1_AUX_INV				XE_REG(0x42b8)
+#define   AUX_INV				REG_BIT(0)
+
+#define XEHP_TILE_ADDR_RANGE(_idx)		XE_REG_MCR(0x4900 + (_idx) * 4)
+#define XEHP_FLAT_CCS_BASE_ADDR			XE_REG_MCR(0x4910)
+
+#define WM_CHICKEN3				XE_REG_MCR(0x5588, XE_REG_OPTION_MASKED)
+#define   HIZ_PLANE_COMPRESSION_DIS		REG_BIT(10)
+
+#define CHICKEN_RASTER_2			XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED)
+#define   TBIMR_FAST_CLIP			REG_BIT(5)
+
+#define FF_MODE					XE_REG_MCR(0x6210)
+#define   DIS_TE_AUTOSTRIP			REG_BIT(31)
+#define   DIS_MESH_PARTIAL_AUTOSTRIP		REG_BIT(16)
+#define   DIS_MESH_AUTOSTRIP			REG_BIT(15)
+
+#define VFLSKPD					XE_REG_MCR(0x62a8, XE_REG_OPTION_MASKED)
+#define   DIS_PARTIAL_AUTOSTRIP			REG_BIT(9)
+#define   DIS_AUTOSTRIP				REG_BIT(6)
+#define   DIS_OVER_FETCH_CACHE			REG_BIT(1)
+#define   DIS_MULT_MISS_RD_SQUASH		REG_BIT(0)
+
+#define FF_MODE2				XE_REG(0x6604)
+#define XEHP_FF_MODE2				XE_REG_MCR(0x6604)
+#define   FF_MODE2_GS_TIMER_MASK		REG_GENMASK(31, 24)
+#define   FF_MODE2_GS_TIMER_224			REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
+#define   FF_MODE2_TDS_TIMER_MASK		REG_GENMASK(23, 16)
+#define   FF_MODE2_TDS_TIMER_128		REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
+
+#define CACHE_MODE_1				XE_REG(0x7004, XE_REG_OPTION_MASKED)
+#define   MSAA_OPTIMIZATION_REDUC_DISABLE	REG_BIT(11)
+
+#define COMMON_SLICE_CHICKEN1			XE_REG(0x7010)
+
+#define HIZ_CHICKEN					XE_REG(0x7018, XE_REG_OPTION_MASKED)
+#define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE	REG_BIT(14)
+#define   HZ_DEPTH_TEST_LE_GE_OPT_DISABLE		REG_BIT(13)
+
+#define XEHP_PSS_MODE2				XE_REG_MCR(0x703c, XE_REG_OPTION_MASKED)
+#define   SCOREBOARD_STALL_FLUSH_CONTROL	REG_BIT(5)
+
+#define XEHP_PSS_CHICKEN			XE_REG_MCR(0x7044, XE_REG_OPTION_MASKED)
+#define   FLSH_IGNORES_PSD			REG_BIT(10)
+#define   FD_END_COLLECT			REG_BIT(5)
+
+#define COMMON_SLICE_CHICKEN4			XE_REG(0x7300, XE_REG_OPTION_MASKED)
+#define   DISABLE_TDC_LOAD_BALANCING_CALC	REG_BIT(6)
+
+#define COMMON_SLICE_CHICKEN3				XE_REG(0x7304, XE_REG_OPTION_MASKED)
+#define XEHP_COMMON_SLICE_CHICKEN3			XE_REG_MCR(0x7304, XE_REG_OPTION_MASKED)
+#define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN	REG_BIT(12)
+#define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE		REG_BIT(12)
+#define   BLEND_EMB_FIX_DISABLE_IN_RCC			REG_BIT(11)
+#define   DISABLE_CPS_AWARE_COLOR_PIPE			REG_BIT(9)
+
+#define XEHP_SLICE_COMMON_ECO_CHICKEN1		XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED)
+#define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE	REG_BIT(14)
+
+#define VF_PREEMPTION				XE_REG(0x83a4, XE_REG_OPTION_MASKED)
+#define   PREEMPTION_VERTEX_COUNT		REG_GENMASK(15, 0)
+
+#define VF_SCRATCHPAD				XE_REG(0x83a8, XE_REG_OPTION_MASKED)
+#define   XE2_VFG_TED_CREDIT_INTERFACE_DISABLE	REG_BIT(13)
+
+#define VFG_PREEMPTION_CHICKEN			XE_REG(0x83b4, XE_REG_OPTION_MASKED)
+#define   POLYGON_TRIFAN_LINELOOP_DISABLE	REG_BIT(4)
+
+#define SQCNT1					XE_REG_MCR(0x8718)
+#define XELPMP_SQCNT1				XE_REG(0x8718)
+#define   ENFORCE_RAR				REG_BIT(23)
+
+#define XEHP_SQCM				XE_REG_MCR(0x8724)
+#define   EN_32B_ACCESS				REG_BIT(30)
+
+#define XE2_FLAT_CCS_BASE_RANGE_LOWER		XE_REG_MCR(0x8800)
+#define   XE2_FLAT_CCS_ENABLE			REG_BIT(0)
+
+#define GSCPSMI_BASE				XE_REG(0x880c)
+
+/* Fuse readout registers for GT */
+#define XEHP_FUSE4				XE_REG(0x9114)
+#define   CCS_EN_MASK				REG_GENMASK(19, 16)
+#define   GT_L3_EXC_MASK			REG_GENMASK(6, 4)
+
+#define	MIRROR_FUSE3				XE_REG(0x9118)
+#define   XE2_NODE_ENABLE_MASK			REG_GENMASK(31, 16)
+#define   L3BANK_PAIR_COUNT			4
+#define   L3BANK_MASK				REG_GENMASK(3, 0)
+/* on Xe_HP the same fuses indicates mslices instead of L3 banks */
+#define   MAX_MSLICES				4
+#define   MEML3_EN_MASK				REG_GENMASK(3, 0)
+
+#define XELP_EU_ENABLE				XE_REG(0x9134)	/* "_DISABLE" on Xe_LP */
+#define   XELP_EU_MASK				REG_GENMASK(7, 0)
+#define XELP_GT_GEOMETRY_DSS_ENABLE		XE_REG(0x913c)
+
+#define GT_VEBOX_VDBOX_DISABLE			XE_REG(0x9140)
+#define   GT_VEBOX_DISABLE_MASK			REG_GENMASK(19, 16)
+#define   GT_VDBOX_DISABLE_MASK			REG_GENMASK(7, 0)
+
+#define XEHP_GT_COMPUTE_DSS_ENABLE		XE_REG(0x9144)
+#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT		XE_REG(0x9148)
+#define XE2_GT_COMPUTE_DSS_2			XE_REG(0x914c)
+#define XE2_GT_GEOMETRY_DSS_1			XE_REG(0x9150)
+#define XE2_GT_GEOMETRY_DSS_2			XE_REG(0x9154)
+
+#define GDRST					XE_REG(0x941c)
+#define   GRDOM_GUC				REG_BIT(3)
+#define   GRDOM_FULL				REG_BIT(0)
+
+#define MISCCPCTL				XE_REG(0x9424)
+#define   DOP_CLOCK_GATE_RENDER_ENABLE		REG_BIT(1)
+
+#define UNSLCGCTL9430				XE_REG(0x9430)
+#define   MSQDUNIT_CLKGATE_DIS			REG_BIT(3)
+
+#define UNSLICE_UNIT_LEVEL_CLKGATE		XE_REG(0x9434)
+#define   VFUNIT_CLKGATE_DIS			REG_BIT(20)
+#define   TSGUNIT_CLKGATE_DIS			REG_BIT(17) /* XEHPSDV */
+#define   CG3DDISCFEG_CLKGATE_DIS		REG_BIT(17) /* DG2 */
+#define   GAMEDIA_CLKGATE_DIS			REG_BIT(11)
+#define   HSUNIT_CLKGATE_DIS			REG_BIT(8)
+#define   VSUNIT_CLKGATE_DIS			REG_BIT(3)
+
+#define UNSLCGCTL9440				XE_REG(0x9440)
+#define   GAMTLBOACS_CLKGATE_DIS		REG_BIT(28)
+#define   GAMTLBVDBOX5_CLKGATE_DIS		REG_BIT(27)
+#define   GAMTLBVDBOX6_CLKGATE_DIS		REG_BIT(26)
+#define   GAMTLBVDBOX3_CLKGATE_DIS		REG_BIT(24)
+#define   GAMTLBVDBOX4_CLKGATE_DIS		REG_BIT(23)
+#define   GAMTLBVDBOX7_CLKGATE_DIS		REG_BIT(22)
+#define   GAMTLBVDBOX2_CLKGATE_DIS		REG_BIT(21)
+#define   GAMTLBVDBOX0_CLKGATE_DIS		REG_BIT(17)
+#define   GAMTLBKCR_CLKGATE_DIS			REG_BIT(16)
+#define   GAMTLBGUC_CLKGATE_DIS			REG_BIT(15)
+#define   GAMTLBBLT_CLKGATE_DIS			REG_BIT(14)
+#define   GAMTLBVDBOX1_CLKGATE_DIS		REG_BIT(6)
+
+#define UNSLCGCTL9444				XE_REG(0x9444)
+#define   GAMTLBGFXA0_CLKGATE_DIS		REG_BIT(30)
+#define   GAMTLBGFXA1_CLKGATE_DIS		REG_BIT(29)
+#define   GAMTLBCOMPA0_CLKGATE_DIS		REG_BIT(28)
+#define   GAMTLBCOMPA1_CLKGATE_DIS		REG_BIT(27)
+#define   GAMTLBCOMPB0_CLKGATE_DIS		REG_BIT(26)
+#define   GAMTLBCOMPB1_CLKGATE_DIS		REG_BIT(25)
+#define   GAMTLBCOMPC0_CLKGATE_DIS		REG_BIT(24)
+#define   GAMTLBCOMPC1_CLKGATE_DIS		REG_BIT(23)
+#define   GAMTLBCOMPD0_CLKGATE_DIS		REG_BIT(22)
+#define   GAMTLBCOMPD1_CLKGATE_DIS		REG_BIT(21)
+#define   GAMTLBMERT_CLKGATE_DIS		REG_BIT(20)
+#define   GAMTLBVEBOX3_CLKGATE_DIS		REG_BIT(19)
+#define   GAMTLBVEBOX2_CLKGATE_DIS		REG_BIT(18)
+#define   GAMTLBVEBOX1_CLKGATE_DIS		REG_BIT(17)
+#define   GAMTLBVEBOX0_CLKGATE_DIS		REG_BIT(16)
+#define   LTCDD_CLKGATE_DIS			REG_BIT(10)
+
+#define XEHP_SLICE_UNIT_LEVEL_CLKGATE		XE_REG_MCR(0x94d4)
+#define   L3_CR2X_CLKGATE_DIS			REG_BIT(17)
+#define   L3_CLKGATE_DIS			REG_BIT(16)
+#define   NODEDSS_CLKGATE_DIS			REG_BIT(12)
+#define   MSCUNIT_CLKGATE_DIS			REG_BIT(10)
+#define   RCCUNIT_CLKGATE_DIS			REG_BIT(7)
+#define   SARBUNIT_CLKGATE_DIS			REG_BIT(5)
+#define   SBEUNIT_CLKGATE_DIS			REG_BIT(4)
+
+#define UNSLICE_UNIT_LEVEL_CLKGATE2		XE_REG(0x94e4)
+#define   VSUNIT_CLKGATE2_DIS			REG_BIT(19)
+
+#define SUBSLICE_UNIT_LEVEL_CLKGATE		XE_REG_MCR(0x9524)
+#define   DSS_ROUTER_CLKGATE_DIS		REG_BIT(28)
+#define   GWUNIT_CLKGATE_DIS			REG_BIT(16)
+
+#define SUBSLICE_UNIT_LEVEL_CLKGATE2		XE_REG_MCR(0x9528)
+#define   CPSSUNIT_CLKGATE_DIS			REG_BIT(9)
+
+#define SSMCGCTL9530				XE_REG_MCR(0x9530)
+#define   RTFUNIT_CLKGATE_DIS			REG_BIT(18)
+
+#define DFR_RATIO_EN_AND_CHICKEN		XE_REG_MCR(0x9550)
+#define   DFR_DISABLE				REG_BIT(9)
+
+#define RPNSWREQ				XE_REG(0xa008)
+#define   REQ_RATIO_MASK			REG_GENMASK(31, 23)
+
+#define RP_CONTROL				XE_REG(0xa024)
+#define   RPSWCTL_MASK				REG_GENMASK(10, 9)
+#define   RPSWCTL_ENABLE			REG_FIELD_PREP(RPSWCTL_MASK, 2)
+#define   RPSWCTL_DISABLE			REG_FIELD_PREP(RPSWCTL_MASK, 0)
+#define RC_CONTROL				XE_REG(0xa090)
+#define   RC_CTL_HW_ENABLE			REG_BIT(31)
+#define   RC_CTL_TO_MODE			REG_BIT(28)
+#define   RC_CTL_RC6_ENABLE			REG_BIT(18)
+#define RC_STATE				XE_REG(0xa094)
+#define RC_IDLE_HYSTERSIS			XE_REG(0xa0ac)
+
+#define PMINTRMSK				XE_REG(0xa168)
+#define   PMINTR_DISABLE_REDIRECT_TO_GUC	REG_BIT(31)
+#define   ARAT_EXPIRED_INTRMSK			REG_BIT(9)
+
+#define FORCEWAKE_GT				XE_REG(0xa188)
+
+#define PG_ENABLE				XE_REG(0xa210)
+
+#define CTC_MODE				XE_REG(0xa26c)
+#define   CTC_SHIFT_PARAMETER_MASK		REG_GENMASK(2, 1)
+#define   CTC_SOURCE_DIVIDE_LOGIC		REG_BIT(0)
+
+#define FORCEWAKE_RENDER			XE_REG(0xa278)
+#define FORCEWAKE_MEDIA_VDBOX(n)		XE_REG(0xa540 + (n) * 4)
+#define FORCEWAKE_MEDIA_VEBOX(n)		XE_REG(0xa560 + (n) * 4)
+#define FORCEWAKE_GSC				XE_REG(0xa618)
+
+#define XEHPC_LNCFMISCCFGREG0			XE_REG_MCR(0xb01c, XE_REG_OPTION_MASKED)
+#define   XEHPC_OVRLSCCC			REG_BIT(0)
+
+/* L3 Cache Control */
+#define XELP_LNCFCMOCS(i)			XE_REG(0xb020 + (i) * 4)
+#define XEHP_LNCFCMOCS(i)			XE_REG_MCR(0xb020 + (i) * 4)
+#define LNCFCMOCS_REG_COUNT			32
+
+#define XEHP_L3NODEARBCFG			XE_REG_MCR(0xb0b4)
+#define   XEHP_LNESPARE				REG_BIT(19)
+
+#define XEHP_L3SQCREG5				XE_REG_MCR(0xb158)
+#define   L3_PWM_TIMER_INIT_VAL_MASK		REG_GENMASK(9, 0)
+
+#define XEHP_L3SCQREG7				XE_REG_MCR(0xb188)
+#define   BLEND_FILL_CACHING_OPT_DIS		REG_BIT(3)
+
+#define XEHPC_L3CLOS_MASK(i)			XE_REG_MCR(0xb194 + (i) * 8)
+
+#define XE2LPM_L3SQCREG5			XE_REG_MCR(0xb658)
+
+#define XEHP_MERT_MOD_CTRL			XE_REG_MCR(0xcf28)
+#define RENDER_MOD_CTRL				XE_REG_MCR(0xcf2c)
+#define COMP_MOD_CTRL				XE_REG_MCR(0xcf30)
+#define XEHP_VDBX_MOD_CTRL			XE_REG_MCR(0xcf34)
+#define XELPMP_VDBX_MOD_CTRL			XE_REG(0xcf34)
+#define XEHP_VEBX_MOD_CTRL			XE_REG_MCR(0xcf38)
+#define XELPMP_VEBX_MOD_CTRL			XE_REG(0xcf38)
+#define   FORCE_MISS_FTLB			REG_BIT(3)
+
+#define XEHP_GAMSTLB_CTRL			XE_REG_MCR(0xcf4c)
+#define   CONTROL_BLOCK_CLKGATE_DIS		REG_BIT(12)
+#define   EGRESS_BLOCK_CLKGATE_DIS		REG_BIT(11)
+#define   TAG_BLOCK_CLKGATE_DIS			REG_BIT(7)
+
+#define XEHP_GAMCNTRL_CTRL			XE_REG_MCR(0xcf54)
+#define   INVALIDATION_BROADCAST_MODE_DIS	REG_BIT(12)
+#define   GLOBAL_INVALIDATION_MODE		REG_BIT(2)
+
+#define HALF_SLICE_CHICKEN5			XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED)
+#define   DISABLE_SAMPLE_G_PERFORMANCE		REG_BIT(0)
+
+#define SAMPLER_MODE				XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
+#define   ENABLE_SMALLPL			REG_BIT(15)
+#define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
+#define   SAMPLER_ENABLE_HEADLESS_MSG		REG_BIT(5)
+#define   INDIRECT_STATE_BASE_ADDR_OVERRIDE	REG_BIT(0)
+
+#define HALF_SLICE_CHICKEN7				XE_REG_MCR(0xe194, XE_REG_OPTION_MASKED)
+#define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA	REG_BIT(15)
+
+#define CACHE_MODE_SS				XE_REG_MCR(0xe420, XE_REG_OPTION_MASKED)
+#define   DISABLE_ECC				REG_BIT(5)
+#define   ENABLE_PREFETCH_INTO_IC		REG_BIT(3)
+
+#define ROW_CHICKEN4				XE_REG_MCR(0xe48c, XE_REG_OPTION_MASKED)
+#define   DISABLE_GRF_CLEAR			REG_BIT(13)
+#define   XEHP_DIS_BBL_SYSPIPE			REG_BIT(11)
+#define   DISABLE_TDL_PUSH			REG_BIT(9)
+#define   DIS_PICK_2ND_EU			REG_BIT(7)
+#define   DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX	REG_BIT(4)
+#define   THREAD_EX_ARB_MODE			REG_GENMASK(3, 2)
+#define   THREAD_EX_ARB_MODE_RR_AFTER_DEP	REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
+
+#define ROW_CHICKEN3				XE_REG_MCR(0xe49c, XE_REG_OPTION_MASKED)
+#define   DIS_FIX_EOT1_FLUSH			REG_BIT(9)
+
+#define ROW_CHICKEN				XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
+#define   UGM_BACKUP_MODE			REG_BIT(13)
+#define   MDQ_ARBITRATION_MODE			REG_BIT(12)
+#define   EARLY_EOT_DIS				REG_BIT(1)
+
+#define ROW_CHICKEN2				XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED)
+#define   DISABLE_READ_SUPPRESSION		REG_BIT(15)
+#define   DISABLE_EARLY_READ			REG_BIT(14)
+#define   ENABLE_LARGE_GRF_MODE			REG_BIT(12)
+#define   PUSH_CONST_DEREF_HOLD_DIS		REG_BIT(8)
+#define   DISABLE_DOP_GATING			REG_BIT(0)
+
+#define RT_CTRL					XE_REG_MCR(0xe530)
+#define   DIS_NULL_QUERY			REG_BIT(10)
+
+#define XEHP_HDC_CHICKEN0					XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED)
+#define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
+#define   DIS_ATOMIC_CHAINING_TYPED_WRITES	REG_BIT(3)
+
+#define LSC_CHICKEN_BIT_0			XE_REG_MCR(0xe7c8)
+#define   DISABLE_D8_D16_COASLESCE		REG_BIT(30)
+#define   TGM_WRITE_EOM_FORCE			REG_BIT(17)
+#define   FORCE_1_SUB_MESSAGE_PER_FRAGMENT	REG_BIT(15)
+#define   SEQUENTIAL_ACCESS_UPGRADE_DISABLE	REG_BIT(13)
+
+#define LSC_CHICKEN_BIT_0_UDW			XE_REG_MCR(0xe7c8 + 4)
+#define   UGM_FRAGMENT_THRESHOLD_TO_3		REG_BIT(58 - 32)
+#define   DIS_CHAIN_2XSIMD8			REG_BIT(55 - 32)
+#define   XE2_ALLOC_DPA_STARVE_FIX_DIS		REG_BIT(47 - 32)
+#define   ENABLE_SMP_LD_RENDER_SURFACE_CONTROL	REG_BIT(44 - 32)
+#define   FORCE_SLM_FENCE_SCOPE_TO_TILE		REG_BIT(42 - 32)
+#define   FORCE_UGM_FENCE_SCOPE_TO_TILE		REG_BIT(41 - 32)
+#define   MAXREQS_PER_BANK			REG_GENMASK(39 - 32, 37 - 32)
+#define   DISABLE_128B_EVICTION_COMMAND_UDW	REG_BIT(36 - 32)
+
+#define SARB_CHICKEN1				XE_REG_MCR(0xe90c)
+#define   COMP_CKN_IN				REG_GENMASK(30, 29)
+
+#define RCU_MODE				XE_REG(0x14800, XE_REG_OPTION_MASKED)
+#define   RCU_MODE_FIXED_SLICE_CCS_MODE		REG_BIT(1)
+#define   RCU_MODE_CCS_ENABLE			REG_BIT(0)
+
+/*
+ * Total of 4 cslices, where each cslice is in the form:
+ *   [0-3]     CCS ID
+ *   [4-6]     RSVD
+ *   [7]       Disabled
+ */
+#define CCS_MODE				XE_REG(0x14804)
+#define   CCS_MODE_CSLICE_0_3_MASK		REG_GENMASK(11, 0) /* 3 bits per cslice */
+#define   CCS_MODE_CSLICE_MASK			0x7 /* CCS0-3 + rsvd */
+#define   CCS_MODE_CSLICE_WIDTH			ilog2(CCS_MODE_CSLICE_MASK + 1)
+#define   CCS_MODE_CSLICE(cslice, ccs) \
+	((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH))
+
+#define FORCEWAKE_ACK_GT			XE_REG(0x130044)
+#define   FORCEWAKE_KERNEL			BIT(0)
+#define   FORCEWAKE_USER			BIT(1)
+#define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
+
+#define MTL_MEDIA_PERF_LIMIT_REASONS		XE_REG(0x138030)
+#define MTL_MEDIA_MC6				XE_REG(0x138048)
+
+#define GT_CORE_STATUS				XE_REG(0x138060)
+#define   RCN_MASK				REG_GENMASK(2, 0)
+#define   GT_C0					0
+#define   GT_C6					3
+
+#define GT_GFX_RC6_LOCKED			XE_REG(0x138104)
+#define GT_GFX_RC6				XE_REG(0x138108)
+
+#define GT0_PERF_LIMIT_REASONS			XE_REG(0x1381a8)
+#define   GT0_PERF_LIMIT_REASONS_MASK		0xde3
+#define   PROCHOT_MASK				REG_BIT(0)
+#define   THERMAL_LIMIT_MASK			REG_BIT(1)
+#define   RATL_MASK				REG_BIT(5)
+#define   VR_THERMALERT_MASK			REG_BIT(6)
+#define   VR_TDC_MASK				REG_BIT(7)
+#define   POWER_LIMIT_4_MASK			REG_BIT(8)
+#define   POWER_LIMIT_1_MASK			REG_BIT(10)
+#define   POWER_LIMIT_2_MASK			REG_BIT(11)
+
+#define GT_PERF_STATUS				XE_REG(0x1381b4)
+#define   VOLTAGE_MASK				REG_GENMASK(10, 0)
+
+#define GT_INTR_DW(x)				XE_REG(0x190018 + ((x) * 4))
+
+#define RENDER_COPY_INTR_ENABLE			XE_REG(0x190030)
+#define VCS_VECS_INTR_ENABLE			XE_REG(0x190034)
+#define GUC_SG_INTR_ENABLE			XE_REG(0x190038)
+#define   ENGINE1_MASK				REG_GENMASK(31, 16)
+#define   ENGINE0_MASK				REG_GENMASK(15, 0)
+#define GPM_WGBOXPERF_INTR_ENABLE		XE_REG(0x19003c)
+#define GUNIT_GSC_INTR_ENABLE			XE_REG(0x190044)
+#define CCS_RSVD_INTR_ENABLE			XE_REG(0x190048)
+
+#define INTR_IDENTITY_REG(x)			XE_REG(0x190060 + ((x) * 4))
+#define   INTR_DATA_VALID			REG_BIT(31)
+#define   INTR_ENGINE_INSTANCE(x)		REG_FIELD_GET(GENMASK(25, 20), x)
+#define   INTR_ENGINE_CLASS(x)			REG_FIELD_GET(GENMASK(18, 16), x)
+#define   INTR_ENGINE_INTR(x)			REG_FIELD_GET(GENMASK(15, 0), x)
+#define   OTHER_GUC_INSTANCE			0
+#define   OTHER_GSC_INSTANCE			6
+
+#define IIR_REG_SELECTOR(x)			XE_REG(0x190070 + ((x) * 4))
+#define RCS0_RSVD_INTR_MASK			XE_REG(0x190090)
+#define BCS_RSVD_INTR_MASK			XE_REG(0x1900a0)
+#define VCS0_VCS1_INTR_MASK			XE_REG(0x1900a8)
+#define VCS2_VCS3_INTR_MASK			XE_REG(0x1900ac)
+#define VECS0_VECS1_INTR_MASK			XE_REG(0x1900d0)
+#define GUC_SG_INTR_MASK			XE_REG(0x1900e8)
+#define GPM_WGBOXPERF_INTR_MASK			XE_REG(0x1900ec)
+#define GUNIT_GSC_INTR_MASK			XE_REG(0x1900f4)
+#define CCS0_CCS1_INTR_MASK			XE_REG(0x190100)
+#define CCS2_CCS3_INTR_MASK			XE_REG(0x190104)
+#define XEHPC_BCS1_BCS2_INTR_MASK		XE_REG(0x190110)
+#define XEHPC_BCS3_BCS4_INTR_MASK		XE_REG(0x190114)
+#define XEHPC_BCS5_BCS6_INTR_MASK		XE_REG(0x190118)
+#define XEHPC_BCS7_BCS8_INTR_MASK		XE_REG(0x19011c)
+#define   GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11)
+#define   GT_CONTEXT_SWITCH_INTERRUPT		REG_BIT(8)
+#define   GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	REG_BIT(4)
+#define   GT_CS_MASTER_ERROR_INTERRUPT		REG_BIT(3)
+#define   GT_RENDER_USER_INTERRUPT		REG_BIT(0)
+
+#define PVC_GT0_PACKAGE_ENERGY_STATUS		XE_REG(0x281004)
+#define PVC_GT0_PACKAGE_RAPL_LIMIT		XE_REG(0x281008)
+#define PVC_GT0_PACKAGE_POWER_SKU_UNIT		XE_REG(0x281068)
+#define PVC_GT0_PLATFORM_ENERGY_STATUS		XE_REG(0x28106c)
+#define PVC_GT0_PACKAGE_POWER_SKU		XE_REG(0x281080)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
new file mode 100644
index 000000000000..92320bbc9d3d
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_REGS_H_
+#define _XE_GUC_REGS_H_
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include "regs/xe_reg_defs.h"
+
+/* Definitions of GuC H/W registers, bits, etc */
+
+#define DIST_DBS_POPULATED			XE_REG(0xd08)
+#define   DOORBELLS_PER_SQIDI_MASK		REG_GENMASK(23, 16)
+#define   SQIDIS_DOORBELL_EXIST_MASK		REG_GENMASK(15, 0)
+
+#define DRBREGL(x)				XE_REG(0x1000 + (x) * 8)
+#define   DRB_VALID				REG_BIT(0)
+#define DRBREGU(x)				XE_REG(0x1000 + (x) * 8 + 4)
+
+#define GTCR					XE_REG(0x4274)
+#define   GTCR_INVALIDATE			REG_BIT(0)
+
+#define GUC_ARAT_C6DIS				XE_REG(0xa178)
+
+#define GUC_STATUS				XE_REG(0xc000)
+#define   GS_AUTH_STATUS_MASK			REG_GENMASK(31, 30)
+#define   GS_AUTH_STATUS_BAD			REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x1)
+#define   GS_AUTH_STATUS_GOOD			REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x2)
+#define   GS_MIA_MASK				REG_GENMASK(18, 16)
+#define   GS_MIA_CORE_STATE			REG_FIELD_PREP(GS_MIA_MASK, 0x1)
+#define   GS_MIA_HALT_REQUESTED			REG_FIELD_PREP(GS_MIA_MASK, 0x2)
+#define   GS_MIA_ISR_ENTRY			REG_FIELD_PREP(GS_MIA_MASK, 0x4)
+#define   GS_UKERNEL_MASK			REG_GENMASK(15, 8)
+#define   GS_BOOTROM_MASK			REG_GENMASK(7, 1)
+#define   GS_BOOTROM_RSA_FAILED			REG_FIELD_PREP(GS_BOOTROM_MASK, 0x50)
+#define   GS_BOOTROM_JUMP_PASSED		REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76)
+#define   GS_MIA_IN_RESET			REG_BIT(0)
+
+#define GUC_WOPCM_SIZE				XE_REG(0xc050)
+#define   GUC_WOPCM_SIZE_MASK			REG_GENMASK(31, 12)
+#define   GUC_WOPCM_SIZE_LOCKED			REG_BIT(0)
+
+#define GUC_SHIM_CONTROL			XE_REG(0xc064)
+#define   GUC_MOCS_INDEX_MASK			REG_GENMASK(27, 24)
+#define   GUC_SHIM_WC_ENABLE			REG_BIT(21)
+#define   GUC_ENABLE_MIA_CLOCK_GATING		REG_BIT(15)
+#define   GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA	REG_BIT(10)
+#define   GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA	REG_BIT(9)
+#define   GUC_MSGCH_ENABLE			REG_BIT(4)
+#define   GUC_ENABLE_MIA_CACHING		REG_BIT(2)
+#define   GUC_ENABLE_READ_CACHE_LOGIC		REG_BIT(1)
+#define   GUC_DISABLE_SRAM_INIT_TO_ZEROES	REG_BIT(0)
+
+#define SOFT_SCRATCH(n)				XE_REG(0xc180 + (n) * 4)
+#define SOFT_SCRATCH_COUNT			16
+
+#define HUC_KERNEL_LOAD_INFO			XE_REG(0xc1dc)
+#define   HUC_LOAD_SUCCESSFUL			REG_BIT(0)
+
+#define UOS_RSA_SCRATCH(i)			XE_REG(0xc200 + (i) * 4)
+#define UOS_RSA_SCRATCH_COUNT			64
+
+#define DMA_ADDR_0_LOW				XE_REG(0xc300)
+#define DMA_ADDR_0_HIGH				XE_REG(0xc304)
+#define DMA_ADDR_1_LOW				XE_REG(0xc308)
+#define DMA_ADDR_1_HIGH				XE_REG(0xc30c)
+#define   DMA_ADDR_SPACE_MASK			REG_GENMASK(20, 16)
+#define   DMA_ADDRESS_SPACE_WOPCM		REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 7)
+#define   DMA_ADDRESS_SPACE_GGTT		REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 8)
+#define DMA_COPY_SIZE				XE_REG(0xc310)
+#define DMA_CTRL				XE_REG(0xc314)
+#define   HUC_UKERNEL				REG_BIT(9)
+#define   UOS_MOVE				REG_BIT(4)
+#define   START_DMA				REG_BIT(0)
+#define DMA_GUC_WOPCM_OFFSET			XE_REG(0xc340)
+#define   GUC_WOPCM_OFFSET_SHIFT		14
+#define   GUC_WOPCM_OFFSET_MASK			REG_GENMASK(31, GUC_WOPCM_OFFSET_SHIFT)
+#define   HUC_LOADING_AGENT_GUC			REG_BIT(1)
+#define   GUC_WOPCM_OFFSET_VALID		REG_BIT(0)
+#define GUC_MAX_IDLE_COUNT			XE_REG(0xc3e4)
+
+#define GUC_SEND_INTERRUPT			XE_REG(0xc4c8)
+#define   GUC_SEND_TRIGGER			REG_BIT(0)
+
+#define GUC_BCS_RCS_IER				XE_REG(0xc550)
+#define GUC_VCS2_VCS1_IER			XE_REG(0xc554)
+#define GUC_WD_VECS_IER				XE_REG(0xc558)
+#define GUC_PM_P24C_IER				XE_REG(0xc55c)
+
+#define GUC_TLB_INV_CR				XE_REG(0xcee8)
+#define   GUC_TLB_INV_CR_INVALIDATE		REG_BIT(0)
+
+#define HUC_STATUS2				XE_REG(0xd3b0)
+#define   HUC_FW_VERIFIED			REG_BIT(7)
+
+#define GT_PM_CONFIG				XE_REG(0x13816c)
+#define   GT_DOORBELL_ENABLE			REG_BIT(0)
+
+#define GUC_HOST_INTERRUPT			XE_REG(0x1901f0)
+
+#define VF_SW_FLAG(n)				XE_REG(0x190240 + (n) * 4)
+#define VF_SW_FLAG_COUNT			4
+
+#define MED_GUC_HOST_INTERRUPT			XE_REG(0x190304)
+
+#define MED_VF_SW_FLAG(n)			XE_REG(0x190310 + (n) * 4)
+#define MED_VF_SW_FLAG_COUNT			4
+
+/* GuC Interrupt Vector */
+#define GUC_INTR_GUC2HOST			REG_BIT(15)
+#define GUC_INTR_EXEC_ERROR			REG_BIT(14)
+#define GUC_INTR_DISPLAY_EVENT			REG_BIT(13)
+#define GUC_INTR_SEM_SIG			REG_BIT(12)
+#define GUC_INTR_IOMMU2GUC			REG_BIT(11)
+#define GUC_INTR_DOORBELL_RANG			REG_BIT(10)
+#define GUC_INTR_DMA_DONE			REG_BIT(9)
+#define GUC_INTR_FATAL_ERROR			REG_BIT(8)
+#define GUC_INTR_NOTIF_ERROR			REG_BIT(7)
+#define GUC_INTR_SW_INT_6			REG_BIT(6)
+#define GUC_INTR_SW_INT_5			REG_BIT(5)
+#define GUC_INTR_SW_INT_4			REG_BIT(4)
+#define GUC_INTR_SW_INT_3			REG_BIT(3)
+#define GUC_INTR_SW_INT_2			REG_BIT(2)
+#define GUC_INTR_SW_INT_1			REG_BIT(1)
+#define GUC_INTR_SW_INT_0			REG_BIT(0)
+
+#define GUC_NUM_DOORBELLS			256
+
+/* format of the HW-monitored doorbell cacheline */
+struct guc_doorbell_info {
+	u32 db_status;
+#define GUC_DOORBELL_DISABLED			0
+#define GUC_DOORBELL_ENABLED			1
+
+	u32 cookie;
+	u32 reserved[14];
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
new file mode 100644
index 000000000000..4be81abc86ad
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_LRC_LAYOUT_H_
+#define _XE_LRC_LAYOUT_H_
+
+#define CTX_CONTEXT_CONTROL		(0x02 + 1)
+#define CTX_RING_HEAD			(0x04 + 1)
+#define CTX_RING_TAIL			(0x06 + 1)
+#define CTX_RING_START			(0x08 + 1)
+#define CTX_RING_CTL			(0x0a + 1)
+#define CTX_PDP0_UDW			(0x30 + 1)
+#define CTX_PDP0_LDW			(0x32 + 1)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
new file mode 100644
index 000000000000..519dd1067a19
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MCHBAR_REGS_H_
+#define _XE_MCHBAR_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+/*
+ * MCHBAR mirror.
+ *
+ * This mirrors the MCHBAR MMIO space whose location is determined by
+ * device 0 function 0's pci config register 0x44 or 0x48 and matches it in
+ * every way.
+ */
+
+#define MCHBAR_MIRROR_BASE_SNB			0x140000
+
+#define PCU_CR_PACKAGE_POWER_SKU		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5930)
+#define   PKG_TDP				GENMASK_ULL(14, 0)
+#define   PKG_MIN_PWR				GENMASK_ULL(30, 16)
+#define   PKG_MAX_PWR				GENMASK_ULL(46, 32)
+#define   PKG_MAX_WIN				GENMASK_ULL(54, 48)
+#define     PKG_MAX_WIN_X			GENMASK_ULL(54, 53)
+#define     PKG_MAX_WIN_Y			GENMASK_ULL(52, 48)
+
+
+#define PCU_CR_PACKAGE_POWER_SKU_UNIT		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5938)
+#define   PKG_PWR_UNIT				REG_GENMASK(3, 0)
+#define   PKG_ENERGY_UNIT			REG_GENMASK(12, 8)
+#define   PKG_TIME_UNIT				REG_GENMASK(19, 16)
+
+#define PCU_CR_PACKAGE_ENERGY_STATUS		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x593c)
+
+#define PCU_CR_PACKAGE_RAPL_LIMIT		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0)
+#define   PKG_PWR_LIM_1				REG_GENMASK(14, 0)
+#define   PKG_PWR_LIM_1_EN			REG_BIT(15)
+#define   PKG_PWR_LIM_1_TIME			REG_GENMASK(23, 17)
+#define   PKG_PWR_LIM_1_TIME_X			REG_GENMASK(23, 22)
+#define   PKG_PWR_LIM_1_TIME_Y			REG_GENMASK(21, 17)
+
+#endif /* _XE_MCHBAR_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
new file mode 100644
index 000000000000..c50e7650c09a
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_REG_DEFS_H_
+#define _XE_REG_DEFS_H_
+
+#include "compat-i915-headers/i915_reg_defs.h"
+
+/**
+ * struct xe_reg - Register definition
+ *
+ * Register defintion to be used by the individual register. Although the same
+ * definition is used for xe_reg and xe_reg_mcr, they use different internal
+ * APIs for accesses.
+ */
+struct xe_reg {
+	union {
+		struct {
+			/** @addr: address */
+			u32 addr:28;
+			/**
+			 * @masked: register is "masked", with upper 16bits used
+			 * to identify the bits that are updated on the lower
+			 * bits
+			 */
+			u32 masked:1;
+			/**
+			 * @mcr: register is multicast/replicated in the
+			 * hardware and needs special handling. Any register
+			 * with this set should also use a type of xe_reg_mcr_t.
+			 * It's only here so the few places that deal with MCR
+			 * registers specially (xe_sr.c) and tests using the raw
+			 * value can inspect it.
+			 */
+			u32 mcr:1;
+			/**
+			 * @ext: access MMIO extension space for current register.
+			 */
+			u32 ext:1;
+		};
+		/** @raw: Raw value with both address and options */
+		u32 raw;
+	};
+};
+
+/**
+ * struct xe_reg_mcr - MCR register definition
+ *
+ * MCR register is the same as a regular register, but uses another type since
+ * the internal API used for accessing them is different: it's never correct to
+ * use regular MMIO access.
+ */
+struct xe_reg_mcr {
+	/** @__reg: The register */
+	struct xe_reg __reg;
+};
+
+
+/**
+ * XE_REG_OPTION_MASKED - Register is "masked", with upper 16 bits marking the
+ * written bits on the lower 16 bits.
+ *
+ * It only applies to registers explicitly marked in bspec with
+ * "Access: Masked". Registers with this option can have write operations to
+ * specific lower bits by setting the corresponding upper bits. Other bits will
+ * not be affected. This allows register writes without needing a RMW cycle and
+ * without caching in software the register value.
+ *
+ * Example: a write with value 0x00010001 will set bit 0 and all other bits
+ * retain their previous values.
+ *
+ * To be used with XE_REG(). XE_REG_MCR() and XE_REG_INITIALIZER()
+ */
+#define XE_REG_OPTION_MASKED		.masked = 1
+
+/**
+ * XE_REG_INITIALIZER - Initializer for xe_reg_t.
+ * @r_: Register offset
+ * @...: Additional options like access mode. See struct xe_reg for available
+ *       options.
+ *
+ * Register field is mandatory, and additional options may be passed as
+ * arguments. Usually ``XE_REG()`` should be preferred since it creates an
+ * object of the right type. However when initializing static const storage,
+ * where a compound statement is not allowed, this can be used instead.
+ */
+#define XE_REG_INITIALIZER(r_, ...)    { .addr = r_, __VA_ARGS__ }
+
+
+/**
+ * XE_REG - Create a struct xe_reg from offset and additional flags
+ * @r_: Register offset
+ * @...: Additional options like access mode. See struct xe_reg for available
+ *       options.
+ */
+#define XE_REG(r_, ...)		((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__))
+
+/**
+ * XE_REG_EXT - Create a struct xe_reg from extension offset and additional
+ * flags
+ * @r_: Register extension offset
+ * @...: Additional options like access mode. See struct xe_reg for available
+ *       options.
+ */
+#define XE_REG_EXT(r_, ...)	\
+	((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .ext = 1))
+
+/**
+ * XE_REG_MCR - Create a struct xe_reg_mcr from offset and additional flags
+ * @r_: Register offset
+ * @...: Additional options like access mode. See struct xe_reg for available
+ *       options.
+ */
+#define XE_REG_MCR(r_, ...)	((const struct xe_reg_mcr){					\
+				 .__reg = XE_REG_INITIALIZER(r_,  ##__VA_ARGS__, .mcr = 1)	\
+				 })
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
new file mode 100644
index 000000000000..2c214bb9b671
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+#ifndef _XE_REGS_H_
+#define _XE_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define TIMESTAMP_OVERRIDE					XE_REG(0x44074)
+#define   TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	REG_GENMASK(15, 12)
+#define   TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK		REG_GENMASK(9, 0)
+
+#define PCU_IRQ_OFFSET				0x444e0
+#define GU_MISC_IRQ_OFFSET			0x444f0
+#define   GU_MISC_GSE				REG_BIT(27)
+
+#define SOFTWARE_FLAGS_SPR33			XE_REG(0x4f084)
+
+#define GU_CNTL_PROTECTED			XE_REG(0x10100C)
+#define   DRIVERINT_FLR_DIS			REG_BIT(31)
+
+#define GU_CNTL					XE_REG(0x101010)
+#define   LMEM_INIT				REG_BIT(7)
+#define   DRIVERFLR				REG_BIT(31)
+
+#define GU_DEBUG				XE_REG(0x101018)
+#define   DRIVERFLR_STATUS			REG_BIT(31)
+
+#define XEHP_CLOCK_GATE_DIS			XE_REG(0x101014)
+#define   SGSI_SIDECLK_DIS			REG_BIT(17)
+
+#define GGC					XE_REG(0x108040)
+#define   GMS_MASK				REG_GENMASK(15, 8)
+#define   GGMS_MASK				REG_GENMASK(7, 6)
+
+#define DSMBASE					XE_REG(0x1080C0)
+#define   BDSM_MASK				REG_GENMASK64(63, 20)
+
+#define GSMBASE					XE_REG(0x108100)
+
+#define STOLEN_RESERVED				XE_REG(0x1082c0)
+#define   WOPCM_SIZE_MASK			REG_GENMASK64(9, 7)
+
+#define MTL_RP_STATE_CAP			XE_REG(0x138000)
+
+#define MTL_GT_RPE_FREQUENCY			XE_REG(0x13800c)
+
+#define MTL_MEDIAP_STATE_CAP			XE_REG(0x138020)
+#define   MTL_RPN_CAP_MASK			REG_GENMASK(24, 16)
+#define   MTL_RP0_CAP_MASK			REG_GENMASK(8, 0)
+
+#define MTL_MPE_FREQUENCY			XE_REG(0x13802c)
+#define   MTL_RPE_MASK				REG_GENMASK(8, 0)
+
+#define DG1_MSTR_TILE_INTR			XE_REG(0x190008)
+#define   DG1_MSTR_IRQ				REG_BIT(31)
+#define   DG1_MSTR_TILE(t)			REG_BIT(t)
+
+#define GFX_MSTR_IRQ				XE_REG(0x190010)
+#define   MASTER_IRQ				REG_BIT(31)
+#define   GU_MISC_IRQ				REG_BIT(29)
+#define   DISPLAY_IRQ				REG_BIT(16)
+#define   GT_DW_IRQ(x)				REG_BIT(x)
+
+#define PVC_RP_STATE_CAP			XE_REG(0x281014)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
new file mode 100644
index 000000000000..58a4e0fad1e1
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _REGS_XE_SRIOV_REGS_H_
+#define _REGS_XE_SRIOV_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define XE2_LMEM_CFG			XE_REG(0x48b0)
+
+#define LMEM_CFG			XE_REG(0xcf58)
+#define   LMEM_EN			REG_BIT(31)
+#define   LMTT_DIR_PTR			REG_GENMASK(30, 0) /* in multiples of 64KB */
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
new file mode 100644
index 000000000000..39d8a0892274
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \
+	xe_bo_test.o \
+	xe_dma_buf_test.o \
+	xe_migrate_test.o \
+	xe_mocs_test.o \
+	xe_pci_test.o \
+	xe_rtp_test.o \
+	xe_wa_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
new file mode 100644
index 000000000000..412b2e7ce40c
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -0,0 +1,353 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include "tests/xe_bo_test.h"
+#include "tests/xe_pci_test.h"
+#include "tests/xe_test.h"
+
+#include "xe_bo_evict.h"
+#include "xe_pci.h"
+#include "xe_pm.h"
+
+static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
+			    bool clear, u64 get_val, u64 assign_val,
+			    struct kunit *test)
+{
+	struct dma_fence *fence;
+	struct ttm_tt *ttm;
+	struct page *page;
+	pgoff_t ccs_page;
+	long timeout;
+	u64 *cpu_map;
+	int ret;
+	u32 offset;
+
+	/* Move bo to VRAM if not already there. */
+	ret = xe_bo_validate(bo, NULL, false);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate bo.\n");
+		return ret;
+	}
+
+	/* Optionally clear bo *and* CCS data in VRAM. */
+	if (clear) {
+		fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource);
+		if (IS_ERR(fence)) {
+			KUNIT_FAIL(test, "Failed to submit bo clear.\n");
+			return PTR_ERR(fence);
+		}
+		dma_fence_put(fence);
+	}
+
+	/* Evict to system. CCS data should be copied. */
+	ret = xe_bo_evict(bo, true);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to evict bo.\n");
+		return ret;
+	}
+
+	/* Sync all migration blits */
+	timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+					DMA_RESV_USAGE_KERNEL,
+					true,
+					5 * HZ);
+	if (timeout <= 0) {
+		KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
+		return -ETIME;
+	}
+
+	/*
+	 * Bo with CCS data is now in system memory. Verify backing store
+	 * and data integrity. Then assign for the next testing round while
+	 * we still have a CPU map.
+	 */
+	ttm = bo->ttm.ttm;
+	if (!ttm || !ttm_tt_is_populated(ttm)) {
+		KUNIT_FAIL(test, "Bo was not in expected placement.\n");
+		return -EINVAL;
+	}
+
+	ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT;
+	if (ccs_page >= ttm->num_pages) {
+		KUNIT_FAIL(test, "No TTM CCS pages present.\n");
+		return -EINVAL;
+	}
+
+	page = ttm->pages[ccs_page];
+	cpu_map = kmap_local_page(page);
+
+	/* Check first CCS value */
+	if (cpu_map[0] != get_val) {
+		KUNIT_FAIL(test,
+			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
+			   (unsigned long long)get_val,
+			   (unsigned long long)cpu_map[0]);
+		ret = -EINVAL;
+	}
+
+	/* Check last CCS value, or at least last value in page. */
+	offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size);
+	offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
+	if (cpu_map[offset] != get_val) {
+		KUNIT_FAIL(test,
+			   "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
+			   (unsigned long long)get_val,
+			   (unsigned long long)cpu_map[offset]);
+		ret = -EINVAL;
+	}
+
+	cpu_map[0] = assign_val;
+	cpu_map[offset] = assign_val;
+	kunmap_local(cpu_map);
+
+	return ret;
+}
+
+static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
+			      struct kunit *test)
+{
+	struct xe_bo *bo;
+
+	int ret;
+
+	/* TODO: Sanity check */
+	unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
+
+	if (IS_DGFX(xe))
+		kunit_info(test, "Testing vram id %u\n", tile->id);
+	else
+		kunit_info(test, "Testing system memory\n");
+
+	bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
+			       ttm_bo_type_device, bo_flags);
+
+	xe_bo_lock(bo, false);
+
+	if (IS_ERR(bo)) {
+		KUNIT_FAIL(test, "Failed to create bo.\n");
+		return;
+	}
+
+	kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
+	ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
+			       test);
+	if (ret)
+		goto out_unlock;
+
+	kunit_info(test, "Verifying that CCS data survives migration.\n");
+	ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL,
+			       0xdeadbeefdeadbeefULL, test);
+	if (ret)
+		goto out_unlock;
+
+	kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
+	ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test);
+
+out_unlock:
+	xe_bo_unlock(bo);
+	xe_bo_put(bo);
+}
+
+static int ccs_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_tile *tile;
+	int id;
+
+	if (!xe_device_has_flat_ccs(xe)) {
+		kunit_info(test, "Skipping non-flat-ccs device.\n");
+		return 0;
+	}
+
+	xe_device_mem_access_get(xe);
+
+	for_each_tile(tile, xe, id) {
+		/* For igfx run only for primary tile */
+		if (!IS_DGFX(xe) && id > 0)
+			continue;
+		ccs_test_run_tile(xe, tile, test);
+	}
+
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+void xe_ccs_migrate_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(ccs_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit);
+
+static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
+{
+	struct xe_bo *bo, *external;
+	unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
+	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
+	struct xe_gt *__gt;
+	int err, i, id;
+
+	kunit_info(test, "Testing device %s vram id %u\n",
+		   dev_name(xe->drm.dev), tile->id);
+
+	for (i = 0; i < 2; ++i) {
+		xe_vm_lock(vm, false);
+		bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
+				       DRM_XE_GEM_CPU_CACHING_WC,
+				       ttm_bo_type_device,
+				       bo_flags);
+		xe_vm_unlock(vm);
+		if (IS_ERR(bo)) {
+			KUNIT_FAIL(test, "bo create err=%pe\n", bo);
+			break;
+		}
+
+		external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
+					     DRM_XE_GEM_CPU_CACHING_WC,
+					     ttm_bo_type_device, bo_flags);
+		if (IS_ERR(external)) {
+			KUNIT_FAIL(test, "external bo create err=%pe\n", external);
+			goto cleanup_bo;
+		}
+
+		xe_bo_lock(external, false);
+		err = xe_bo_pin_external(external);
+		xe_bo_unlock(external);
+		if (err) {
+			KUNIT_FAIL(test, "external bo pin err=%pe\n",
+				   ERR_PTR(err));
+			goto cleanup_external;
+		}
+
+		err = xe_bo_evict_all(xe);
+		if (err) {
+			KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err));
+			goto cleanup_all;
+		}
+
+		for_each_gt(__gt, xe, id)
+			xe_gt_sanitize(__gt);
+		err = xe_bo_restore_kernel(xe);
+		/*
+		 * Snapshotting the CTB and copying back a potentially old
+		 * version seems risky, depending on what might have been
+		 * inflight. Also it seems snapshotting the ADS object and
+		 * copying back results in serious breakage. Normally when
+		 * calling xe_bo_restore_kernel() we always fully restart the
+		 * GT, which re-intializes such things.  We could potentially
+		 * skip saving and restoring such objects in xe_bo_evict_all()
+		 * however seems quite fragile not to also restart the GT. Try
+		 * to do that here by triggering a GT reset.
+		 */
+		for_each_gt(__gt, xe, id) {
+			xe_gt_reset_async(__gt);
+			flush_work(&__gt->reset.worker);
+		}
+		if (err) {
+			KUNIT_FAIL(test, "restore kernel err=%pe\n",
+				   ERR_PTR(err));
+			goto cleanup_all;
+		}
+
+		err = xe_bo_restore_user(xe);
+		if (err) {
+			KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
+			goto cleanup_all;
+		}
+
+		if (!xe_bo_is_vram(external)) {
+			KUNIT_FAIL(test, "external bo is not vram\n");
+			err = -EPROTO;
+			goto cleanup_all;
+		}
+
+		if (xe_bo_is_vram(bo)) {
+			KUNIT_FAIL(test, "bo is vram\n");
+			err = -EPROTO;
+			goto cleanup_all;
+		}
+
+		if (i) {
+			down_read(&vm->lock);
+			xe_vm_lock(vm, false);
+			err = xe_bo_validate(bo, bo->vm, false);
+			xe_vm_unlock(vm);
+			up_read(&vm->lock);
+			if (err) {
+				KUNIT_FAIL(test, "bo valid err=%pe\n",
+					   ERR_PTR(err));
+				goto cleanup_all;
+			}
+			xe_bo_lock(external, false);
+			err = xe_bo_validate(external, NULL, false);
+			xe_bo_unlock(external);
+			if (err) {
+				KUNIT_FAIL(test, "external bo valid err=%pe\n",
+					   ERR_PTR(err));
+				goto cleanup_all;
+			}
+		}
+
+		xe_bo_lock(external, false);
+		xe_bo_unpin_external(external);
+		xe_bo_unlock(external);
+
+		xe_bo_put(external);
+
+		xe_bo_lock(bo, false);
+		__xe_bo_unset_bulk_move(bo);
+		xe_bo_unlock(bo);
+		xe_bo_put(bo);
+		continue;
+
+cleanup_all:
+		xe_bo_lock(external, false);
+		xe_bo_unpin_external(external);
+		xe_bo_unlock(external);
+cleanup_external:
+		xe_bo_put(external);
+cleanup_bo:
+		xe_bo_lock(bo, false);
+		__xe_bo_unset_bulk_move(bo);
+		xe_bo_unlock(bo);
+		xe_bo_put(bo);
+		break;
+	}
+
+	xe_vm_put(vm);
+
+	return 0;
+}
+
+static int evict_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_tile *tile;
+	int id;
+
+	if (!IS_DGFX(xe)) {
+		kunit_info(test, "Skipping non-discrete device %s.\n",
+			   dev_name(xe->drm.dev));
+		return 0;
+	}
+
+	xe_device_mem_access_get(xe);
+
+	for_each_tile(tile, xe, id)
+		evict_test_run_tile(xe, tile, test);
+
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+void xe_bo_evict_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(evict_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c
new file mode 100644
index 000000000000..f408f17f2164
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo_test.h"
+
+#include <kunit/test.h>
+
+static struct kunit_case xe_bo_tests[] = {
+	KUNIT_CASE(xe_ccs_migrate_kunit),
+	KUNIT_CASE(xe_bo_evict_kunit),
+	{}
+};
+
+static struct kunit_suite xe_bo_test_suite = {
+	.name = "xe_bo",
+	.test_cases = xe_bo_tests,
+};
+
+kunit_test_suite(xe_bo_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_bo kunit test");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.h b/drivers/gpu/drm/xe/tests/xe_bo_test.h
new file mode 100644
index 000000000000..0113ab45066a
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_BO_TEST_H_
+#define _XE_BO_TEST_H_
+
+struct kunit;
+
+void xe_ccs_migrate_kunit(struct kunit *test);
+void xe_bo_evict_kunit(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
new file mode 100644
index 000000000000..9f6d571d7fa9
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/xe_drm.h>
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include "tests/xe_dma_buf_test.h"
+#include "tests/xe_pci_test.h"
+
+#include "xe_pci.h"
+
+static bool p2p_enabled(struct dma_buf_test_params *params)
+{
+	return IS_ENABLED(CONFIG_PCI_P2PDMA) && params->attach_ops &&
+		params->attach_ops->allow_peer2peer;
+}
+
+static bool is_dynamic(struct dma_buf_test_params *params)
+{
+	return IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY) && params->attach_ops &&
+		params->attach_ops->move_notify;
+}
+
+static void check_residency(struct kunit *test, struct xe_bo *exported,
+			    struct xe_bo *imported, struct dma_buf *dmabuf)
+{
+	struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
+	u32 mem_type;
+	int ret;
+
+	xe_bo_assert_held(exported);
+	xe_bo_assert_held(imported);
+
+	mem_type = XE_PL_VRAM0;
+	if (!(params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+		/* No VRAM allowed */
+		mem_type = XE_PL_TT;
+	else if (params->force_different_devices && !p2p_enabled(params))
+		/* No P2P */
+		mem_type = XE_PL_TT;
+	else if (params->force_different_devices && !is_dynamic(params) &&
+		 (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT))
+		/* Pin migrated to TT */
+		mem_type = XE_PL_TT;
+
+	if (!xe_bo_is_mem_type(exported, mem_type)) {
+		KUNIT_FAIL(test, "Exported bo was not in expected memory type.\n");
+		return;
+	}
+
+	if (xe_bo_is_pinned(exported))
+		return;
+
+	/*
+	 * Evict exporter. Note that the gem object dma_buf member isn't
+	 * set from xe_gem_prime_export(), and it's needed for the move_notify()
+	 * functionality, so hack that up here. Evicting the exported bo will
+	 * evict also the imported bo through the move_notify() functionality if
+	 * importer is on a different device. If they're on the same device,
+	 * the exporter and the importer should be the same bo.
+	 */
+	swap(exported->ttm.base.dma_buf, dmabuf);
+	ret = xe_bo_evict(exported, true);
+	swap(exported->ttm.base.dma_buf, dmabuf);
+	if (ret) {
+		if (ret != -EINTR && ret != -ERESTARTSYS)
+			KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n",
+				   ret);
+		return;
+	}
+
+	/* Verify that also importer has been evicted to SYSTEM */
+	if (exported != imported && !xe_bo_is_mem_type(imported, XE_PL_SYSTEM)) {
+		KUNIT_FAIL(test, "Importer wasn't properly evicted.\n");
+		return;
+	}
+
+	/* Re-validate the importer. This should move also exporter in. */
+	ret = xe_bo_validate(imported, NULL, false);
+	if (ret) {
+		if (ret != -EINTR && ret != -ERESTARTSYS)
+			KUNIT_FAIL(test, "Validating importer failed with err=%d.\n",
+				   ret);
+		return;
+	}
+
+	/*
+	 * If on different devices, the exporter is kept in system  if
+	 * possible, saving a migration step as the transfer is just
+	 * likely as fast from system memory.
+	 */
+	if (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)
+		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT));
+	else
+		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
+
+	if (params->force_different_devices)
+		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT));
+	else
+		KUNIT_EXPECT_TRUE(test, exported == imported);
+}
+
+static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
+	struct drm_gem_object *import;
+	struct dma_buf *dmabuf;
+	struct xe_bo *bo;
+	size_t size;
+
+	/* No VRAM on this device? */
+	if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) &&
+	    (params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+		return;
+
+	size = PAGE_SIZE;
+	if ((params->mem_mask & XE_BO_CREATE_VRAM0_BIT) &&
+	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+		size = SZ_64K;
+
+	kunit_info(test, "running %s\n", __func__);
+	bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
+			       ttm_bo_type_device, XE_BO_CREATE_USER_BIT | params->mem_mask);
+	if (IS_ERR(bo)) {
+		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+			   PTR_ERR(bo));
+		return;
+	}
+
+	dmabuf = xe_gem_prime_export(&bo->ttm.base, 0);
+	if (IS_ERR(dmabuf)) {
+		KUNIT_FAIL(test, "xe_gem_prime_export() failed with err=%ld\n",
+			   PTR_ERR(dmabuf));
+		goto out;
+	}
+
+	import = xe_gem_prime_import(&xe->drm, dmabuf);
+	if (!IS_ERR(import)) {
+		struct xe_bo *import_bo = gem_to_xe_bo(import);
+
+		/*
+		 * Did import succeed when it shouldn't due to lack of p2p support?
+		 */
+		if (params->force_different_devices &&
+		    !p2p_enabled(params) &&
+		    !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+			KUNIT_FAIL(test,
+				   "xe_gem_prime_import() succeeded when it shouldn't have\n");
+		} else {
+			int err;
+
+			/* Is everything where we expect it to be? */
+			xe_bo_lock(import_bo, false);
+			err = xe_bo_validate(import_bo, NULL, false);
+
+			/* Pinning in VRAM is not allowed. */
+			if (!is_dynamic(params) &&
+			    params->force_different_devices &&
+			    !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT))
+				KUNIT_EXPECT_EQ(test, err, -EINVAL);
+			/* Otherwise only expect interrupts or success. */
+			else if (err && err != -EINTR && err != -ERESTARTSYS)
+				KUNIT_EXPECT_TRUE(test, !err || err == -EINTR ||
+						  err == -ERESTARTSYS);
+
+			if (!err)
+				check_residency(test, bo, import_bo, dmabuf);
+			xe_bo_unlock(import_bo);
+		}
+		drm_gem_object_put(import);
+	} else if (PTR_ERR(import) != -EOPNOTSUPP) {
+		/* Unexpected error code. */
+		KUNIT_FAIL(test,
+			   "xe_gem_prime_import failed with the wrong err=%ld\n",
+			   PTR_ERR(import));
+	} else if (!params->force_different_devices ||
+		   p2p_enabled(params) ||
+		   (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+		/* Shouldn't fail if we can reuse same bo, use p2p or use system */
+		KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n",
+			   PTR_ERR(import));
+	}
+	dma_buf_put(dmabuf);
+out:
+	drm_gem_object_put(&bo->ttm.base);
+}
+
+static const struct dma_buf_attach_ops nop2p_attach_ops = {
+	.allow_peer2peer = false,
+	.move_notify = xe_dma_buf_move_notify
+};
+
+/*
+ * We test the implementation with bos of different residency and with
+ * importers with different capabilities; some lacking p2p support and some
+ * lacking dynamic capabilities (attach_ops == NULL). We also fake
+ * different devices avoiding the import shortcut that just reuses the same
+ * gem object.
+ */
+static const struct dma_buf_test_params test_params[] = {
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops},
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops},
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT},
+	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &nop2p_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .attach_ops = &nop2p_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &xe_dma_buf_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .attach_ops = &nop2p_attach_ops,
+	 .force_different_devices = true},
+
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT},
+	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	 .force_different_devices = true},
+
+	{}
+};
+
+static int dma_buf_run_device(struct xe_device *xe)
+{
+	const struct dma_buf_test_params *params;
+	struct kunit *test = xe_cur_kunit();
+
+	for (params = test_params; params->mem_mask; ++params) {
+		struct dma_buf_test_params p = *params;
+
+		p.base.id = XE_TEST_LIVE_DMA_BUF;
+		test->priv = &p;
+		xe_test_dmabuf_import_same_driver(xe);
+	}
+
+	/* A non-zero return would halt iteration over driver devices */
+	return 0;
+}
+
+void xe_dma_buf_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(dma_buf_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_dma_buf_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
new file mode 100644
index 000000000000..9f5a9cda8c0f
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_dma_buf_test.h"
+
+#include <kunit/test.h>
+
+static struct kunit_case xe_dma_buf_tests[] = {
+	KUNIT_CASE(xe_dma_buf_kunit),
+	{}
+};
+
+static struct kunit_suite xe_dma_buf_test_suite = {
+	.name = "xe_dma_buf",
+	.test_cases = xe_dma_buf_tests,
+};
+
+kunit_test_suite(xe_dma_buf_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_dma_buf kunit test");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h
new file mode 100644
index 000000000000..e6b464ddd526
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DMA_BUF_TEST_H_
+#define _XE_DMA_BUF_TEST_H_
+
+struct kunit;
+
+void xe_dma_buf_kunit(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_lmtt_test.c b/drivers/gpu/drm/xe/tests/xe_lmtt_test.c
new file mode 100644
index 000000000000..1f1557c45ae1
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_lmtt_test.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+static const struct lmtt_ops_param {
+	const char *desc;
+	const struct xe_lmtt_ops *ops;
+} lmtt_ops_params[] = {
+	{ "2-level", &lmtt_2l_ops, },
+	{ "multi-level", &lmtt_ml_ops, },
+};
+
+static void lmtt_ops_param_get_desc(const struct lmtt_ops_param *p, char *desc)
+{
+	snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s", p->desc);
+}
+
+KUNIT_ARRAY_PARAM(lmtt_ops, lmtt_ops_params, lmtt_ops_param_get_desc);
+
+static void test_ops(struct kunit *test)
+{
+	const struct lmtt_ops_param *p = test->param_value;
+	const struct xe_lmtt_ops *ops = p->ops;
+	unsigned int n;
+
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_root_pd_level);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_num);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_size);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_shift);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_index);
+	KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_encode);
+
+	KUNIT_EXPECT_NE(test, ops->lmtt_root_pd_level(), 0);
+
+	for (n = 0; n <= ops->lmtt_root_pd_level(); n++) {
+		KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_num(n), 0,
+				    "level=%u", n);
+		KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_size(n), 0,
+				    "level=%u", n);
+		KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_encode(0, n), LMTT_PTE_INVALID,
+				    "level=%u", n);
+	}
+
+	for (n = 0; n < ops->lmtt_root_pd_level(); n++) {
+		u64 addr = BIT_ULL(ops->lmtt_pte_shift(n));
+
+		KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_shift(n), 0,
+				    "level=%u", n);
+		KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr - 1, n), 0,
+				    "addr=%#llx level=%u", addr, n);
+		KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr + 1, n), 1,
+				    "addr=%#llx level=%u", addr, n);
+		KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr * 2 - 1, n), 1,
+				    "addr=%#llx level=%u", addr, n);
+		KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr * 2, n), 2,
+				    "addr=%#llx level=%u", addr, n);
+	}
+}
+
+static struct kunit_case lmtt_test_cases[] = {
+	KUNIT_CASE_PARAM(test_ops, lmtt_ops_gen_params),
+	{}
+};
+
+static struct kunit_suite lmtt_suite = {
+	.name = "lmtt",
+	.test_cases = lmtt_test_cases,
+};
+
+kunit_test_suites(&lmtt_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
new file mode 100644
index 000000000000..7a32faa2f688
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020-2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include "tests/xe_migrate_test.h"
+#include "tests/xe_pci_test.h"
+
+#include "xe_pci.h"
+
+static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence,
+				const char *str, struct kunit *test)
+{
+	long ret;
+
+	if (IS_ERR(fence)) {
+		KUNIT_FAIL(test, "Failed to create fence for %s: %li\n", str,
+			   PTR_ERR(fence));
+		return true;
+	}
+	if (!fence)
+		return true;
+
+	ret = dma_fence_wait_timeout(fence, false, 5 * HZ);
+	if (ret <= 0) {
+		KUNIT_FAIL(test, "Fence timed out for %s: %li\n", str, ret);
+		return true;
+	}
+
+	return false;
+}
+
+static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
+			  struct xe_bb *bb, u32 second_idx, const char *str,
+			  struct kunit *test)
+{
+	u64 batch_base = xe_migrate_batch_base(m, xe->info.has_usm);
+	struct xe_sched_job *job = xe_bb_create_migration_job(m->q, bb,
+							      batch_base,
+							      second_idx);
+	struct dma_fence *fence;
+
+	if (IS_ERR(job)) {
+		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+			   PTR_ERR(job));
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	if (sanity_fence_failed(xe, fence, str, test))
+		return -ETIMEDOUT;
+
+	dma_fence_put(fence);
+	kunit_info(test, "%s: Job completed\n", str);
+	return 0;
+}
+
+static void
+sanity_populate_cb(struct xe_migrate_pt_update *pt_update,
+		   struct xe_tile *tile, struct iosys_map *map, void *dst,
+		   u32 qword_ofs, u32 num_qwords,
+		   const struct xe_vm_pgtable_update *update)
+{
+	struct migrate_test_params *p =
+		to_migrate_test_params(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));
+	int i;
+	u64 *ptr = dst;
+	u64 value;
+
+	for (i = 0; i < num_qwords; i++) {
+		value = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL;
+		if (map)
+			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
+				  sizeof(u64), u64, value);
+		else
+			ptr[i] = value;
+	}
+
+	kunit_info(xe_cur_kunit(), "Used %s.\n", map ? "CPU" : "GPU");
+	if (p->force_gpu && map)
+		KUNIT_FAIL(xe_cur_kunit(), "GPU pagetable update used CPU.\n");
+}
+
+static const struct xe_migrate_pt_update_ops sanity_ops = {
+	.populate = sanity_populate_cb,
+};
+
+#define check(_retval, _expected, str, _test)				\
+	do { if ((_retval) != (_expected)) {				\
+			KUNIT_FAIL(_test, "Sanity check failed: " str	\
+				   " expected %llx, got %llx\n",	\
+				   (u64)(_expected), (u64)(_retval));	\
+		} } while (0)
+
+static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
+		      struct kunit *test, u32 region)
+{
+	struct xe_device *xe = tile_to_xe(m->tile);
+	u64 retval, expected = 0;
+	bool big = bo->size >= SZ_2M;
+	struct dma_fence *fence;
+	const char *str = big ? "Copying big bo" : "Copying small bo";
+	int err;
+
+	struct xe_bo *remote = xe_bo_create_locked(xe, m->tile, NULL,
+						   bo->size,
+						   ttm_bo_type_kernel,
+						   region |
+						   XE_BO_NEEDS_CPU_ACCESS);
+	if (IS_ERR(remote)) {
+		KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %li\n",
+			   str, PTR_ERR(remote));
+		return;
+	}
+
+	err = xe_bo_validate(remote, NULL, false);
+	if (err) {
+		KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n",
+			   str, err);
+		goto out_unlock;
+	}
+
+	err = xe_bo_vmap(remote);
+	if (err) {
+		KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n",
+			   str, err);
+		goto out_unlock;
+	}
+
+	xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size);
+	fence = xe_migrate_clear(m, remote, remote->ttm.resource);
+	if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" :
+				 "Clearing remote small bo", test)) {
+		retval = xe_map_rd(xe, &remote->vmap, 0, u64);
+		check(retval, expected, "remote first offset should be cleared",
+		      test);
+		retval = xe_map_rd(xe, &remote->vmap, remote->size - 8, u64);
+		check(retval, expected, "remote last offset should be cleared",
+		      test);
+	}
+	dma_fence_put(fence);
+
+	/* Try to copy 0xc0 from remote to vram with 2MB or 64KiB/4KiB pages */
+	xe_map_memset(xe, &remote->vmap, 0, 0xc0, remote->size);
+	xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size);
+
+	expected = 0xc0c0c0c0c0c0c0c0;
+	fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource,
+				bo->ttm.resource, false);
+	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo remote -> vram" :
+				 "Copying small bo remote -> vram", test)) {
+		retval = xe_map_rd(xe, &bo->vmap, 0, u64);
+		check(retval, expected,
+		      "remote -> vram bo first offset should be copied", test);
+		retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64);
+		check(retval, expected,
+		      "remote -> vram bo offset should be copied", test);
+	}
+	dma_fence_put(fence);
+
+	/* And other way around.. slightly hacky.. */
+	xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size);
+	xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size);
+
+	fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource,
+				remote->ttm.resource, false);
+	if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> remote" :
+				 "Copying small bo vram -> remote", test)) {
+		retval = xe_map_rd(xe, &remote->vmap, 0, u64);
+		check(retval, expected,
+		      "vram -> remote bo first offset should be copied", test);
+		retval = xe_map_rd(xe, &remote->vmap, bo->size - 8, u64);
+		check(retval, expected,
+		      "vram -> remote bo last offset should be copied", test);
+	}
+	dma_fence_put(fence);
+
+	xe_bo_vunmap(remote);
+out_unlock:
+	xe_bo_unlock(remote);
+	xe_bo_put(remote);
+}
+
+static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo,
+			     struct kunit *test)
+{
+	test_copy(m, bo, test, XE_BO_CREATE_SYSTEM_BIT);
+}
+
+static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
+			   struct kunit *test)
+{
+	u32 region;
+
+	if (bo->ttm.resource->mem_type == XE_PL_SYSTEM)
+		return;
+
+	if (bo->ttm.resource->mem_type == XE_PL_VRAM0)
+		region = XE_BO_CREATE_VRAM1_BIT;
+	else
+		region = XE_BO_CREATE_VRAM0_BIT;
+	test_copy(m, bo, test, region);
+}
+
+static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
+			   struct kunit *test, bool force_gpu)
+{
+	struct xe_device *xe = tile_to_xe(m->tile);
+	struct dma_fence *fence;
+	u64 retval, expected;
+	ktime_t then, now;
+	int i;
+
+	struct xe_vm_pgtable_update update = {
+		.ofs = 1,
+		.qwords = 0x10,
+		.pt_bo = pt,
+	};
+	struct xe_migrate_pt_update pt_update = {
+		.ops = &sanity_ops,
+	};
+	struct migrate_test_params p = {
+		.base.id = XE_TEST_LIVE_MIGRATE,
+		.force_gpu = force_gpu,
+	};
+
+	test->priv = &p;
+	/* Test xe_migrate_update_pgtables() updates the pagetable as expected */
+	expected = 0xf0f0f0f0f0f0f0f0ULL;
+	xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size);
+
+	then = ktime_get();
+	fence = xe_migrate_update_pgtables(m, m->q->vm, NULL, m->q, &update, 1,
+					   NULL, 0, &pt_update);
+	now = ktime_get();
+	if (sanity_fence_failed(xe, fence, "Migration pagetable update", test))
+		return;
+
+	kunit_info(test, "Updating without syncing took %llu us,\n",
+		   (unsigned long long)ktime_to_us(ktime_sub(now, then)));
+
+	dma_fence_put(fence);
+	retval = xe_map_rd(xe, &pt->vmap, 0, u64);
+	check(retval, expected, "PTE[0] must stay untouched", test);
+
+	for (i = 0; i < update.qwords; i++) {
+		retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64);
+		check(retval, i * 0x1111111111111111ULL, "PTE update", test);
+	}
+
+	retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords),
+			   u64);
+	check(retval, expected, "PTE[0x11] must stay untouched", test);
+}
+
+static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
+{
+	struct xe_tile *tile = m->tile;
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_bo *pt, *bo = m->pt_bo, *big, *tiny;
+	struct xe_res_cursor src_it;
+	struct dma_fence *fence;
+	u64 retval, expected;
+	struct xe_bb *bb;
+	int err;
+	u8 id = tile->id;
+
+	err = xe_bo_vmap(bo);
+	if (err) {
+		KUNIT_FAIL(test, "Failed to vmap our pagetables: %li\n",
+			   PTR_ERR(bo));
+		return;
+	}
+
+	big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M,
+				   ttm_bo_type_kernel,
+				   XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				   XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(big)) {
+		KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big));
+		goto vunmap;
+	}
+
+	pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(pt)) {
+		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+			   PTR_ERR(pt));
+		goto free_big;
+	}
+
+	tiny = xe_bo_create_pin_map(xe, tile, m->q->vm,
+				    2 * SZ_4K,
+				    ttm_bo_type_kernel,
+				    XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				    XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(tiny)) {
+		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+			   PTR_ERR(pt));
+		goto free_pt;
+	}
+
+	bb = xe_bb_new(tile->primary_gt, 32, xe->info.has_usm);
+	if (IS_ERR(bb)) {
+		KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n",
+			   PTR_ERR(bb));
+		goto free_tiny;
+	}
+
+	kunit_info(test, "Starting tests, top level PT addr: %lx, special pagetable base addr: %lx\n",
+		   (unsigned long)xe_bo_main_addr(m->q->vm->pt_root[id]->bo, XE_PAGE_SIZE),
+		   (unsigned long)xe_bo_main_addr(m->pt_bo, XE_PAGE_SIZE));
+
+	/* First part of the test, are we updating our pagetable bo with a new entry? */
+	xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
+		  0xdeaddeadbeefbeef);
+	expected = m->q->vm->pt_ops->pte_encode_bo(pt, 0, xe->pat.idx[XE_CACHE_WB], 0);
+	if (m->q->vm->flags & XE_VM_FLAG_64K)
+		expected |= XE_PTE_PS64;
+	if (xe_bo_is_vram(pt))
+		xe_res_first(pt->ttm.resource, 0, pt->size, &src_it);
+	else
+		xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it);
+
+	emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false,
+		 &src_it, XE_PAGE_SIZE, pt);
+
+	run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test);
+
+	retval = xe_map_rd(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1),
+			   u64);
+	check(retval, expected, "PTE entry write", test);
+
+	/* Now try to write data to our newly mapped 'pagetable', see if it succeeds */
+	bb->len = 0;
+	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+	xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead);
+	expected = 0;
+
+	emit_clear(tile->primary_gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4,
+		   IS_DGFX(xe));
+	run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable",
+		       test);
+
+	retval = xe_map_rd(xe, &pt->vmap, 0, u32);
+	check(retval, expected, "Write to PT after adding PTE", test);
+
+	/* Sanity checks passed, try the full ones! */
+
+	/* Clear a small bo */
+	kunit_info(test, "Clearing small buffer object\n");
+	xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size);
+	expected = 0;
+	fence = xe_migrate_clear(m, tiny, tiny->ttm.resource);
+	if (sanity_fence_failed(xe, fence, "Clearing small bo", test))
+		goto out;
+
+	dma_fence_put(fence);
+	retval = xe_map_rd(xe, &tiny->vmap, 0, u32);
+	check(retval, expected, "Command clear small first value", test);
+	retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32);
+	check(retval, expected, "Command clear small last value", test);
+
+	kunit_info(test, "Copying small buffer object to system\n");
+	test_copy_sysmem(m, tiny, test);
+	if (xe->info.tile_count > 1) {
+		kunit_info(test, "Copying small buffer object to other vram\n");
+		test_copy_vram(m, tiny, test);
+	}
+
+	/* Clear a big bo */
+	kunit_info(test, "Clearing big buffer object\n");
+	xe_map_memset(xe, &big->vmap, 0, 0x11, big->size);
+	expected = 0;
+	fence = xe_migrate_clear(m, big, big->ttm.resource);
+	if (sanity_fence_failed(xe, fence, "Clearing big bo", test))
+		goto out;
+
+	dma_fence_put(fence);
+	retval = xe_map_rd(xe, &big->vmap, 0, u32);
+	check(retval, expected, "Command clear big first value", test);
+	retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32);
+	check(retval, expected, "Command clear big last value", test);
+
+	kunit_info(test, "Copying big buffer object to system\n");
+	test_copy_sysmem(m, big, test);
+	if (xe->info.tile_count > 1) {
+		kunit_info(test, "Copying big buffer object to other vram\n");
+		test_copy_vram(m, big, test);
+	}
+
+	kunit_info(test, "Testing page table update using CPU if GPU idle.\n");
+	test_pt_update(m, pt, test, false);
+	kunit_info(test, "Testing page table update using GPU\n");
+	test_pt_update(m, pt, test, true);
+
+out:
+	xe_bb_free(bb, NULL);
+free_tiny:
+	xe_bo_unpin(tiny);
+	xe_bo_put(tiny);
+free_pt:
+	xe_bo_unpin(pt);
+	xe_bo_put(pt);
+free_big:
+	xe_bo_unpin(big);
+	xe_bo_put(big);
+vunmap:
+	xe_bo_vunmap(m->pt_bo);
+}
+
+static int migrate_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	struct xe_tile *tile;
+	int id;
+
+	for_each_tile(tile, xe, id) {
+		struct xe_migrate *m = tile->migrate;
+
+		kunit_info(test, "Testing tile id %d.\n", id);
+		xe_vm_lock(m->q->vm, true);
+		xe_device_mem_access_get(xe);
+		xe_migrate_sanity_test(m, test);
+		xe_device_mem_access_put(xe);
+		xe_vm_unlock(m->q->vm);
+	}
+
+	return 0;
+}
+
+void xe_migrate_sanity_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(migrate_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
new file mode 100644
index 000000000000..cf0c173b945f
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_migrate_test.h"
+
+#include <kunit/test.h>
+
+static struct kunit_case xe_migrate_tests[] = {
+	KUNIT_CASE(xe_migrate_sanity_kunit),
+	{}
+};
+
+static struct kunit_suite xe_migrate_test_suite = {
+	.name = "xe_migrate",
+	.test_cases = xe_migrate_tests,
+};
+
+kunit_test_suite(xe_migrate_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_migrate kunit test");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
new file mode 100644
index 000000000000..7c645c66824f
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MIGRATE_TEST_H_
+#define _XE_MIGRATE_TEST_H_
+
+struct kunit;
+
+void xe_migrate_sanity_kunit(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
new file mode 100644
index 000000000000..7dd34f94e809
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include "tests/xe_mocs_test.h"
+#include "tests/xe_pci_test.h"
+#include "tests/xe_test.h"
+
+#include "xe_pci.h"
+#include "xe_gt.h"
+#include "xe_mocs.h"
+#include "xe_device.h"
+
+struct live_mocs {
+	struct xe_mocs_info table;
+};
+
+static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt)
+{
+	unsigned int flags;
+	struct kunit *test = xe_cur_kunit();
+
+	memset(arg, 0, sizeof(*arg));
+
+	flags = get_mocs_settings(gt_to_xe(gt), &arg->table);
+
+	kunit_info(test, "table size %d", arg->table.size);
+	kunit_info(test, "table uc_index %d", arg->table.uc_index);
+	kunit_info(test, "table n_entries %d", arg->table.n_entries);
+
+	return flags;
+}
+
+static void read_l3cc_table(struct xe_gt *gt,
+			    const struct xe_mocs_info *info)
+{
+	unsigned int i;
+	u32 l3cc;
+	u32 reg_val;
+	u32 ret;
+
+	struct kunit *test = xe_cur_kunit();
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
+	mocs_dbg(&gt_to_xe(gt)->drm, "L3CC entries:%d\n", info->n_entries);
+	for (i = 0;
+	     i < (info->n_entries + 1) / 2 ?
+	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
+				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
+	     i++) {
+		if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
+		else
+			reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i,
+			 XELP_LNCFCMOCS(i).addr, reg_val, l3cc);
+		if (reg_val != l3cc)
+			KUNIT_FAIL(test, "l3cc reg 0x%x has incorrect val.\n",
+				   XELP_LNCFCMOCS(i).addr);
+	}
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	xe_device_mem_access_put(gt_to_xe(gt));
+}
+
+static void read_mocs_table(struct xe_gt *gt,
+			    const struct xe_mocs_info *info)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	unsigned int i;
+	u32 mocs;
+	u32 reg_val;
+	u32 ret;
+
+	struct kunit *test = xe_cur_kunit();
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
+	mocs_dbg(&gt_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries);
+	drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
+		      "Unused entries index should have been defined\n");
+	for (i = 0;
+	     i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
+	     i++) {
+		if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
+		else
+			reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i,
+			 XELP_GLOBAL_MOCS(i).addr, reg_val, mocs);
+		if (reg_val != mocs)
+			KUNIT_FAIL(test, "mocs reg 0x%x has incorrect val.\n",
+				   XELP_GLOBAL_MOCS(i).addr);
+	}
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	xe_device_mem_access_put(gt_to_xe(gt));
+}
+
+static int mocs_kernel_test_run_device(struct xe_device *xe)
+{
+	/* Basic check the system is configured with the expected mocs table */
+
+	struct live_mocs mocs;
+	struct xe_gt *gt;
+
+	unsigned int flags;
+	int id;
+
+	for_each_gt(gt, xe, id) {
+		flags = live_mocs_init(&mocs, gt);
+		if (flags & HAS_GLOBAL_MOCS)
+			read_mocs_table(gt, &mocs.table);
+		if (flags & HAS_LNCF_MOCS)
+			read_l3cc_table(gt, &mocs.table);
+	}
+	return 0;
+}
+
+void xe_live_mocs_kernel_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(mocs_kernel_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_kernel_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
new file mode 100644
index 000000000000..ef56bd517b28
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_mocs_test.h"
+
+#include <kunit/test.h>
+
+static struct kunit_case xe_mocs_tests[] = {
+	KUNIT_CASE(xe_live_mocs_kernel_kunit),
+	{}
+};
+
+static struct kunit_suite xe_mocs_test_suite = {
+	.name = "xe_mocs",
+	.test_cases = xe_mocs_tests,
+};
+
+kunit_test_suite(xe_mocs_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.h b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
new file mode 100644
index 000000000000..7faa3575e6c3
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MOCS_TEST_H_
+#define _XE_MOCS_TEST_H_
+
+struct kunit;
+
+void xe_live_mocs_kernel_kunit(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
new file mode 100644
index 000000000000..602793644f61
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "tests/xe_pci_test.h"
+
+#include "tests/xe_test.h"
+
+#include <kunit/test-bug.h>
+#include <kunit/test.h>
+#include <kunit/test-bug.h>
+#include <kunit/visibility.h>
+
+struct kunit_test_data {
+	int ndevs;
+	xe_device_fn xe_fn;
+};
+
+static int dev_to_xe_device_fn(struct device *dev, void *__data)
+
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct kunit_test_data *data = __data;
+	int ret = 0;
+	int idx;
+
+	data->ndevs++;
+
+	if (drm_dev_enter(drm, &idx))
+		ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev)));
+	drm_dev_exit(idx);
+
+	return ret;
+}
+
+/**
+ * xe_call_for_each_device - Iterate over all devices this driver binds to
+ * @xe_fn: Function to call for each device.
+ *
+ * This function iterated over all devices this driver binds to, and calls
+ * @xe_fn: for each one of them. If the called function returns anything else
+ * than 0, iteration is stopped and the return value is returned by this
+ * function. Across each function call, drm_dev_enter() / drm_dev_exit() is
+ * called for the corresponding drm device.
+ *
+ * Return: Number of devices iterated or
+ *         the error code of a call to @xe_fn returning an error code.
+ */
+int xe_call_for_each_device(xe_device_fn xe_fn)
+{
+	int ret;
+	struct kunit_test_data data = {
+	    .xe_fn = xe_fn,
+	    .ndevs = 0,
+	};
+
+	ret = driver_for_each_device(&xe_pci_driver.driver, NULL,
+				     &data, dev_to_xe_device_fn);
+
+	if (!data.ndevs)
+		kunit_skip(current->kunit_test, "test runs only on hardware\n");
+
+	return ret ?: data.ndevs;
+}
+
+/**
+ * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs
+ * @xe_fn: Function to call for each device.
+ *
+ * This function iterates over the descriptors for all graphics IPs recognized
+ * by the driver and calls @xe_fn: for each one of them.
+ */
+void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn)
+{
+	const struct xe_graphics_desc *ip, *last = NULL;
+
+	for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) {
+		ip = graphics_ip_map[i].ip;
+		if (ip == last)
+			continue;
+
+		xe_fn(ip);
+		last = ip;
+	}
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_graphics_ip);
+
+/**
+ * xe_call_for_each_media_ip - Iterate over all recognized media IPs
+ * @xe_fn: Function to call for each device.
+ *
+ * This function iterates over the descriptors for all media IPs recognized
+ * by the driver and calls @xe_fn: for each one of them.
+ */
+void xe_call_for_each_media_ip(xe_media_fn xe_fn)
+{
+	const struct xe_media_desc *ip, *last = NULL;
+
+	for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) {
+		ip = media_ip_map[i].ip;
+		if (ip == last)
+			continue;
+
+		xe_fn(ip);
+		last = ip;
+	}
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip);
+
+static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type,
+			    u32 *ver, u32 *revid)
+{
+	struct kunit *test = kunit_get_current_test();
+	struct xe_pci_fake_data *data = test->priv;
+
+	if (type == GMDID_MEDIA) {
+		*ver = data->media_verx100;
+		*revid = xe_step_to_gmdid(data->media_step);
+	} else {
+		*ver = data->graphics_verx100;
+		*revid = xe_step_to_gmdid(data->graphics_step);
+	}
+}
+
+int xe_pci_fake_device_init(struct xe_device *xe)
+{
+	struct kunit *test = kunit_get_current_test();
+	struct xe_pci_fake_data *data = test->priv;
+	const struct pci_device_id *ent = pciidlist;
+	const struct xe_device_desc *desc;
+	const struct xe_subplatform_desc *subplatform_desc;
+
+	if (!data) {
+		desc = (const void *)ent->driver_data;
+		subplatform_desc = NULL;
+		goto done;
+	}
+
+	for (ent = pciidlist; ent->device; ent++) {
+		desc = (const void *)ent->driver_data;
+		if (desc->platform == data->platform)
+			break;
+	}
+
+	if (!ent->device)
+		return -ENODEV;
+
+	for (subplatform_desc = desc->subplatforms;
+	     subplatform_desc && subplatform_desc->subplatform;
+	     subplatform_desc++)
+		if (subplatform_desc->subplatform == data->subplatform)
+			break;
+
+	if (data->subplatform != XE_SUBPLATFORM_NONE && !subplatform_desc)
+		return -ENODEV;
+
+done:
+	kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid);
+
+	xe_info_init_early(xe, desc, subplatform_desc);
+	xe_info_init(xe, desc->graphics, desc->media);
+
+	return 0;
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c
new file mode 100644
index 000000000000..171e4180f1aa
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_drv.h>
+#include <drm/drm_kunit_helpers.h>
+
+#include <kunit/test.h>
+
+#include "tests/xe_test.h"
+
+#include "xe_device.h"
+#include "xe_pci_test.h"
+#include "xe_pci_types.h"
+
+static void check_graphics_ip(const struct xe_graphics_desc *graphics)
+{
+	struct kunit *test = xe_cur_kunit();
+	u64 mask = graphics->hw_engine_mask;
+
+	/* RCS, CCS, and BCS engines are allowed on the graphics IP */
+	mask &= ~(XE_HW_ENGINE_RCS_MASK |
+		  XE_HW_ENGINE_CCS_MASK |
+		  XE_HW_ENGINE_BCS_MASK);
+
+	/* Any remaining engines are an error */
+	KUNIT_ASSERT_EQ(test, mask, 0);
+}
+
+static void check_media_ip(const struct xe_media_desc *media)
+{
+	struct kunit *test = xe_cur_kunit();
+	u64 mask = media->hw_engine_mask;
+
+	/* VCS, VECS and GSCCS engines are allowed on the media IP */
+	mask &= ~(XE_HW_ENGINE_VCS_MASK |
+		  XE_HW_ENGINE_VECS_MASK |
+		  XE_HW_ENGINE_GSCCS_MASK);
+
+	/* Any remaining engines are an error */
+	KUNIT_ASSERT_EQ(test, mask, 0);
+}
+
+static void xe_gmdid_graphics_ip(struct kunit *test)
+{
+	xe_call_for_each_graphics_ip(check_graphics_ip);
+}
+
+static void xe_gmdid_media_ip(struct kunit *test)
+{
+	xe_call_for_each_media_ip(check_media_ip);
+}
+
+static struct kunit_case xe_pci_tests[] = {
+	KUNIT_CASE(xe_gmdid_graphics_ip),
+	KUNIT_CASE(xe_gmdid_media_ip),
+	{}
+};
+
+static struct kunit_suite xe_pci_test_suite = {
+	.name = "xe_pci",
+	.test_cases = xe_pci_tests,
+};
+
+kunit_test_suite(xe_pci_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_pci kunit test");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
new file mode 100644
index 000000000000..811ffe5bd9fd
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_PCI_TEST_H_
+#define _XE_PCI_TEST_H_
+
+#include <linux/types.h>
+
+#include "xe_platform_types.h"
+
+struct xe_device;
+struct xe_graphics_desc;
+struct xe_media_desc;
+
+typedef int (*xe_device_fn)(struct xe_device *);
+typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *);
+typedef void (*xe_media_fn)(const struct xe_media_desc *);
+
+int xe_call_for_each_device(xe_device_fn xe_fn);
+void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn);
+void xe_call_for_each_media_ip(xe_media_fn xe_fn);
+
+struct xe_pci_fake_data {
+	enum xe_platform platform;
+	enum xe_subplatform subplatform;
+	u32 graphics_verx100;
+	u32 media_verx100;
+	u32 graphics_step;
+	u32 media_step;
+};
+
+int xe_pci_fake_device_init(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
new file mode 100644
index 000000000000..4a6972897675
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/string.h>
+#include <linux/xarray.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_kunit_helpers.h>
+
+#include <kunit/test.h>
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_reg_defs.h"
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_pci_test.h"
+#include "xe_reg_sr.h"
+#include "xe_rtp.h"
+
+#define REGULAR_REG1	XE_REG(1)
+#define REGULAR_REG2	XE_REG(2)
+#define REGULAR_REG3	XE_REG(3)
+#define MCR_REG1	XE_REG_MCR(1)
+#define MCR_REG2	XE_REG_MCR(2)
+#define MCR_REG3	XE_REG_MCR(3)
+#define MASKED_REG1	XE_REG(1, XE_REG_OPTION_MASKED)
+
+#undef XE_REG_MCR
+#define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
+
+struct rtp_test_case {
+	const char *name;
+	struct xe_reg expected_reg;
+	u32 expected_set_bits;
+	u32 expected_clr_bits;
+	unsigned long expected_count;
+	unsigned int expected_sr_errors;
+	const struct xe_rtp_entry_sr *entries;
+};
+
+static bool match_yes(const struct xe_gt *gt, const struct xe_hw_engine *hwe)
+{
+	return true;
+}
+
+static bool match_no(const struct xe_gt *gt, const struct xe_hw_engine *hwe)
+{
+	return false;
+}
+
+static const struct rtp_test_case cases[] = {
+	{
+		.name = "coalesce-same-reg",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0) | REG_BIT(1),
+		.expected_clr_bits = REG_BIT(0) | REG_BIT(1),
+		.expected_count = 1,
+		/* Different bits on the same register: create a single entry */
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "no-match-no-add",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		/* Don't coalesce second entry since rules don't match */
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_no)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "no-match-no-add-multiple-rules",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		/* Don't coalesce second entry due to one of the rules */
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes), FUNC(match_no)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "two-regs-two-entries",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 2,
+		/* Same bits on different registers are not coalesced */
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG2, REG_BIT(0)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "clr-one-set-other",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(1) | REG_BIT(0),
+		.expected_count = 1,
+		/* Check clr vs set actions on different bits */
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(CLR(REGULAR_REG1, REG_BIT(1)))
+			},
+			{}
+		},
+	},
+	{
+#define TEMP_MASK	REG_GENMASK(10, 8)
+#define TEMP_FIELD	REG_FIELD_PREP(TEMP_MASK, 2)
+		.name = "set-field",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = TEMP_FIELD,
+		.expected_clr_bits = TEMP_MASK,
+		.expected_count = 1,
+		/* Check FIELD_SET works */
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(FIELD_SET(REGULAR_REG1,
+						   TEMP_MASK, TEMP_FIELD))
+			},
+			{}
+		},
+#undef TEMP_MASK
+#undef TEMP_FIELD
+	},
+	{
+		.name = "conflict-duplicate",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		.expected_sr_errors = 1,
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			/* drop: setting same values twice */
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "conflict-not-disjoint",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		.expected_sr_errors = 1,
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			/* drop: bits are not disjoint with previous entries */
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(CLR(REGULAR_REG1, REG_GENMASK(1, 0)))
+			},
+			{}
+		},
+	},
+	{
+		.name = "conflict-reg-type",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0),
+		.expected_clr_bits = REG_BIT(0),
+		.expected_count = 1,
+		.expected_sr_errors = 2,
+		.entries = (const struct xe_rtp_entry_sr[]) {
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0)))
+			},
+			/* drop: regular vs MCR */
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(MCR_REG1, REG_BIT(1)))
+			},
+			/* drop: regular vs masked */
+			{ XE_RTP_NAME("basic-3"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(SET(MASKED_REG1, REG_BIT(0)))
+			},
+			{}
+		},
+	},
+};
+
+static void xe_rtp_process_tests(struct kunit *test)
+{
+	const struct rtp_test_case *param = test->param_value;
+	struct xe_device *xe = test->priv;
+	struct xe_gt *gt = xe_device_get_root_tile(xe)->primary_gt;
+	struct xe_reg_sr *reg_sr = &gt->reg_sr;
+	const struct xe_reg_sr_entry *sre, *sr_entry = NULL;
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
+	unsigned long idx, count = 0;
+
+	xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe);
+	xe_rtp_process_to_sr(&ctx, param->entries, reg_sr);
+
+	xa_for_each(&reg_sr->xa, idx, sre) {
+		if (idx == param->expected_reg.addr)
+			sr_entry = sre;
+
+		count++;
+	}
+
+	KUNIT_EXPECT_EQ(test, count, param->expected_count);
+	KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits);
+	KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits);
+	KUNIT_EXPECT_EQ(test, sr_entry->reg.raw, param->expected_reg.raw);
+	KUNIT_EXPECT_EQ(test, reg_sr->errors, param->expected_sr_errors);
+}
+
+static void rtp_desc(const struct rtp_test_case *t, char *desc)
+{
+	strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(rtp, cases, rtp_desc);
+
+static int xe_rtp_test_init(struct kunit *test)
+{
+	struct xe_device *xe;
+	struct device *dev;
+	int ret;
+
+	dev = drm_kunit_helper_alloc_device(test);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	xe = drm_kunit_helper_alloc_drm_device(test, dev,
+					       struct xe_device,
+					       drm, DRIVER_GEM);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
+
+	/* Initialize an empty device */
+	test->priv = NULL;
+	ret = xe_pci_fake_device_init(xe);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	xe->drm.dev = dev;
+	test->priv = xe;
+
+	return 0;
+}
+
+static void xe_rtp_test_exit(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+
+	drm_kunit_helper_free_device(test, xe->drm.dev);
+}
+
+static struct kunit_case xe_rtp_tests[] = {
+	KUNIT_CASE_PARAM(xe_rtp_process_tests, rtp_gen_params),
+	{}
+};
+
+static struct kunit_suite xe_rtp_test_suite = {
+	.name = "xe_rtp",
+	.init = xe_rtp_test_init,
+	.exit = xe_rtp_test_exit,
+	.test_cases = xe_rtp_tests,
+};
+
+kunit_test_suite(xe_rtp_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_rtp kunit test");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h
new file mode 100644
index 000000000000..7a1ae213e750
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_test.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TEST_H_
+#define _XE_TEST_H_
+
+#include <linux/types.h>
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include <linux/sched.h>
+#include <kunit/test.h>
+
+/*
+ * Each test that provides a kunit private test structure, place a test id
+ * here and point the kunit->priv to an embedded struct xe_test_priv.
+ */
+enum xe_test_priv_id {
+	XE_TEST_LIVE_DMA_BUF,
+	XE_TEST_LIVE_MIGRATE,
+};
+
+/**
+ * struct xe_test_priv - Base class for test private info
+ * @id: enum xe_test_priv_id to identify the subclass.
+ */
+struct xe_test_priv {
+	enum xe_test_priv_id id;
+};
+
+#define XE_TEST_DECLARE(x) x
+#define XE_TEST_ONLY(x) unlikely(x)
+#define XE_TEST_EXPORT
+#define xe_cur_kunit() current->kunit_test
+
+/**
+ * xe_cur_kunit_priv - Obtain the struct xe_test_priv pointed to by
+ * current->kunit->priv if it exists and is embedded in the expected subclass.
+ * @id: Id of the expected subclass.
+ *
+ * Return: NULL if the process is not a kunit test, and NULL if the
+ * current kunit->priv pointer is not pointing to an object of the expected
+ * subclass. A pointer to the embedded struct xe_test_priv otherwise.
+ */
+static inline struct xe_test_priv *
+xe_cur_kunit_priv(enum xe_test_priv_id id)
+{
+	struct xe_test_priv *priv;
+
+	if (!xe_cur_kunit())
+		return NULL;
+
+	priv = xe_cur_kunit()->priv;
+	return priv->id == id ? priv : NULL;
+}
+
+#else /* if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) */
+
+#define XE_TEST_DECLARE(x)
+#define XE_TEST_ONLY(x) 0
+#define XE_TEST_EXPORT static
+#define xe_cur_kunit() NULL
+#define xe_cur_kunit_priv(_id) NULL
+
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
new file mode 100644
index 000000000000..a53c22a19582
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_drv.h>
+#include <drm/drm_kunit_helpers.h>
+
+#include <kunit/test.h>
+
+#include "xe_device.h"
+#include "xe_pci_test.h"
+#include "xe_reg_sr.h"
+#include "xe_tuning.h"
+#include "xe_wa.h"
+
+struct platform_test_case {
+	const char *name;
+	enum xe_platform platform;
+	enum xe_subplatform subplatform;
+	u32 graphics_verx100;
+	u32 media_verx100;
+	struct xe_step_info step;
+};
+
+#define PLATFORM_CASE(platform__, graphics_step__)				\
+	{									\
+		.name = #platform__ " (" #graphics_step__ ")",			\
+		.platform = XE_ ## platform__,					\
+		.subplatform = XE_SUBPLATFORM_NONE,				\
+		.step = { .graphics = STEP_ ## graphics_step__ }		\
+	}
+
+
+#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__)			\
+	{										\
+		.name = #platform__ "_" #subplatform__ " (" #graphics_step__ ")",	\
+		.platform = XE_ ## platform__,						\
+		.subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__,	\
+		.step = { .graphics = STEP_ ## graphics_step__ }			\
+	}
+
+#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__,		\
+		   media_verx100__, media_step__)				\
+	{									\
+		.name = #platform__ " (g:" #graphics_step__ ", m:" #media_step__ ")",\
+		.platform = XE_ ## platform__,					\
+		.subplatform = XE_SUBPLATFORM_NONE,				\
+		.graphics_verx100 = graphics_verx100__,				\
+		.media_verx100 = media_verx100__,				\
+		.step = { .graphics = STEP_ ## graphics_step__,			\
+			   .media = STEP_ ## media_step__ }			\
+	}
+
+static const struct platform_test_case cases[] = {
+	PLATFORM_CASE(TIGERLAKE, B0),
+	PLATFORM_CASE(DG1, A0),
+	PLATFORM_CASE(DG1, B0),
+	PLATFORM_CASE(ALDERLAKE_S, A0),
+	PLATFORM_CASE(ALDERLAKE_S, B0),
+	PLATFORM_CASE(ALDERLAKE_S, C0),
+	PLATFORM_CASE(ALDERLAKE_S, D0),
+	PLATFORM_CASE(ALDERLAKE_P, A0),
+	PLATFORM_CASE(ALDERLAKE_P, B0),
+	PLATFORM_CASE(ALDERLAKE_P, C0),
+	SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0),
+	SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0),
+	SUBPLATFORM_CASE(DG2, G10, A0),
+	SUBPLATFORM_CASE(DG2, G10, A1),
+	SUBPLATFORM_CASE(DG2, G10, B0),
+	SUBPLATFORM_CASE(DG2, G10, C0),
+	SUBPLATFORM_CASE(DG2, G11, A0),
+	SUBPLATFORM_CASE(DG2, G11, B0),
+	SUBPLATFORM_CASE(DG2, G11, B1),
+	SUBPLATFORM_CASE(DG2, G12, A0),
+	SUBPLATFORM_CASE(DG2, G12, A1),
+	PLATFORM_CASE(PVC, B0),
+	PLATFORM_CASE(PVC, B1),
+	PLATFORM_CASE(PVC, C0),
+	GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0),
+	GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0),
+	GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
+	GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0),
+};
+
+static void platform_desc(const struct platform_test_case *t, char *desc)
+{
+	strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(platform, cases, platform_desc);
+
+static int xe_wa_test_init(struct kunit *test)
+{
+	const struct platform_test_case *param = test->param_value;
+	struct xe_pci_fake_data data = {
+		.platform = param->platform,
+		.subplatform = param->subplatform,
+		.graphics_verx100 = param->graphics_verx100,
+		.media_verx100 = param->media_verx100,
+		.graphics_step = param->step.graphics,
+		.media_step = param->step.media,
+	};
+	struct xe_device *xe;
+	struct device *dev;
+	int ret;
+
+	dev = drm_kunit_helper_alloc_device(test);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	xe = drm_kunit_helper_alloc_drm_device(test, dev,
+					       struct xe_device,
+					       drm, DRIVER_GEM);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
+
+	test->priv = &data;
+	ret = xe_pci_fake_device_init(xe);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	if (!param->graphics_verx100)
+		xe->info.step = param->step;
+
+	/* TODO: init hw engines for engine/LRC WAs */
+	xe->drm.dev = dev;
+	test->priv = xe;
+
+	return 0;
+}
+
+static void xe_wa_test_exit(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+
+	drm_kunit_helper_free_device(test, xe->drm.dev);
+}
+
+static void xe_wa_gt(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+	struct xe_gt *gt;
+	int id;
+
+	for_each_gt(gt, xe, id) {
+		xe_reg_sr_init(&gt->reg_sr, "GT", xe);
+
+		xe_wa_process_gt(gt);
+		xe_tuning_process_gt(gt);
+
+		KUNIT_ASSERT_EQ(test, gt->reg_sr.errors, 0);
+	}
+}
+
+static struct kunit_case xe_wa_tests[] = {
+	KUNIT_CASE_PARAM(xe_wa_gt, platform_gen_params),
+	{}
+};
+
+static struct kunit_suite xe_rtp_test_suite = {
+	.name = "xe_wa",
+	.init = xe_wa_test_init,
+	.exit = xe_wa_test_exit,
+	.test_cases = xe_wa_tests,
+};
+
+kunit_test_suite(xe_rtp_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_wa kunit test");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h
new file mode 100644
index 000000000000..34c142e6cfb0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_assert.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_ASSERT_H_
+#define _XE_ASSERT_H_
+
+#include <linux/string_helpers.h>
+
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+#include "xe_step.h"
+
+/**
+ * DOC: Xe ASSERTs
+ *
+ * While Xe driver aims to be simpler than legacy i915 driver it is still
+ * complex enough that some changes introduced while adding new functionality
+ * could break the existing code.
+ *
+ * Adding &drm_WARN or &drm_err to catch unwanted programming usage could lead
+ * to undesired increased driver footprint and may impact production driver
+ * performance as this additional code will be always present.
+ *
+ * To allow annotate functions with additional detailed debug checks to assert
+ * that all prerequisites are satisfied, without worrying about footprint or
+ * performance penalty on production builds where all potential misuses
+ * introduced during code integration were already fixed, we introduce family
+ * of Xe assert macros that try to follow classic assert() utility:
+ *
+ *  * xe_assert()
+ *  * xe_tile_assert()
+ *  * xe_gt_assert()
+ *
+ * These macros are implemented on top of &drm_WARN, but unlikely to the origin,
+ * warning is triggered when provided condition is false. Additionally all above
+ * assert macros cannot be used in expressions or as a condition, since
+ * underlying code will be compiled out on non-debug builds.
+ *
+ * Note that these macros are not intended for use to cover known gaps in the
+ * implementation; for such cases use regular &drm_WARN or &drm_err and provide
+ * valid safe fallback.
+ *
+ * Also in cases where performance or footprint is not an issue, developers
+ * should continue to use the regular &drm_WARN or &drm_err to ensure that bug
+ * reports from production builds will contain meaningful diagnostics data.
+ *
+ * Below code shows how asserts could help in debug to catch unplanned use::
+ *
+ *	static void one_igfx(struct xe_device *xe)
+ *	{
+ *		xe_assert(xe, xe->info.is_dgfx == false);
+ *		xe_assert(xe, xe->info.tile_count == 1);
+ *	}
+ *
+ *	static void two_dgfx(struct xe_device *xe)
+ *	{
+ *		xe_assert(xe, xe->info.is_dgfx);
+ *		xe_assert(xe, xe->info.tile_count == 2);
+ *	}
+ *
+ *	void foo(struct xe_device *xe)
+ *	{
+ *		if (xe->info.dgfx)
+ *			return two_dgfx(xe);
+ *		return one_igfx(xe);
+ *	}
+ *
+ *	void bar(struct xe_device *xe)
+ *	{
+ *		if (drm_WARN_ON(xe->drm, xe->info.tile_count > 2))
+ *			return;
+ *
+ *		if (xe->info.tile_count == 2)
+ *			return two_dgfx(xe);
+ *		return one_igfx(xe);
+ *	}
+ */
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+#define __xe_assert_msg(xe, condition, msg, arg...) ({						\
+	(void)drm_WARN(&(xe)->drm, !(condition), "[" DRM_NAME "] Assertion `%s` failed!\n" msg,	\
+		       __stringify(condition), ## arg);						\
+})
+#else
+#define __xe_assert_msg(xe, condition, msg, arg...) ({						\
+	typecheck(const struct xe_device *, xe);						\
+	BUILD_BUG_ON_INVALID(condition);							\
+})
+#endif
+
+/**
+ * xe_assert - warn if condition is false when debugging.
+ * @xe: the &struct xe_device pointer to which &condition applies
+ * @condition: condition to check
+ *
+ * xe_assert() uses &drm_WARN to emit a warning and print additional information
+ * that could be read from the &xe pointer if provided &condition is false.
+ *
+ * Contrary to &drm_WARN, xe_assert() is effective only on debug builds
+ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
+ * or as a condition.
+ *
+ * See `Xe ASSERTs`_ for general usage guidelines.
+ */
+#define xe_assert(xe, condition) xe_assert_msg((xe), condition, "")
+#define xe_assert_msg(xe, condition, msg, arg...) ({						\
+	const struct xe_device *__xe = (xe);							\
+	__xe_assert_msg(__xe, condition,							\
+			"platform: %d subplatform: %d\n"					\
+			"graphics: %s %u.%02u step %s\n"					\
+			"media: %s %u.%02u step %s\n"						\
+			msg,									\
+			__xe->info.platform, __xe->info.subplatform,				\
+			__xe->info.graphics_name,						\
+			__xe->info.graphics_verx100 / 100,					\
+			__xe->info.graphics_verx100 % 100,					\
+			xe_step_name(__xe->info.step.graphics),					\
+			__xe->info.media_name,							\
+			__xe->info.media_verx100 / 100,						\
+			__xe->info.media_verx100 % 100,						\
+			xe_step_name(__xe->info.step.media),					\
+			## arg);								\
+})
+
+/**
+ * xe_tile_assert - warn if condition is false when debugging.
+ * @tile: the &struct xe_tile pointer to which &condition applies
+ * @condition: condition to check
+ *
+ * xe_tile_assert() uses &drm_WARN to emit a warning and print additional
+ * information that could be read from the &tile pointer if provided &condition
+ * is false.
+ *
+ * Contrary to &drm_WARN, xe_tile_assert() is effective only on debug builds
+ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
+ * or as a condition.
+ *
+ * See `Xe ASSERTs`_ for general usage guidelines.
+ */
+#define xe_tile_assert(tile, condition) xe_tile_assert_msg((tile), condition, "")
+#define xe_tile_assert_msg(tile, condition, msg, arg...) ({					\
+	const struct xe_tile *__tile = (tile);							\
+	char __buf[10] __maybe_unused;								\
+	xe_assert_msg(tile_to_xe(__tile), condition, "tile: %u VRAM %s\n" msg,			\
+		      __tile->id, ({ string_get_size(__tile->mem.vram.actual_physical_size, 1,	\
+				     STRING_UNITS_2, __buf, sizeof(__buf)); __buf; }), ## arg);	\
+})
+
+/**
+ * xe_gt_assert - warn if condition is false when debugging.
+ * @gt: the &struct xe_gt pointer to which &condition applies
+ * @condition: condition to check
+ *
+ * xe_gt_assert() uses &drm_WARN to emit a warning and print additional
+ * information that could be safetely read from the &gt pointer if provided
+ * &condition is false.
+ *
+ * Contrary to &drm_WARN, xe_gt_assert() is effective only on debug builds
+ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions
+ * or as a condition.
+ *
+ * See `Xe ASSERTs`_ for general usage guidelines.
+ */
+#define xe_gt_assert(gt, condition) xe_gt_assert_msg((gt), condition, "")
+#define xe_gt_assert_msg(gt, condition, msg, arg...) ({						\
+	const struct xe_gt *__gt = (gt);							\
+	xe_tile_assert_msg(gt_to_tile(__gt), condition, "GT: %u type %d\n" msg,			\
+			   __gt->info.id, __gt->info.type, ## arg);				\
+})
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
new file mode 100644
index 000000000000..7c124475c428
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bb.h"
+
+#include "instructions/xe_mi_commands.h"
+#include "regs/xe_gpu_commands.h"
+#include "xe_device.h"
+#include "xe_exec_queue_types.h"
+#include "xe_gt.h"
+#include "xe_hw_fence.h"
+#include "xe_sa.h"
+#include "xe_sched_job.h"
+#include "xe_vm_types.h"
+
+static int bb_prefetch(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt))
+		/*
+		 * RCS and CCS require 1K, although other engines would be
+		 * okay with 512.
+		 */
+		return SZ_1K;
+	else
+		return SZ_512;
+}
+
+struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
+	int err;
+
+	if (!bb)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * We need to allocate space for the requested number of dwords,
+	 * one additional MI_BATCH_BUFFER_END dword, and additional buffer
+	 * space to accomodate the platform-specific hardware prefetch
+	 * requirements.
+	 */
+	bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
+			      4 * (dwords + 1) + bb_prefetch(gt));
+	if (IS_ERR(bb->bo)) {
+		err = PTR_ERR(bb->bo);
+		goto err;
+	}
+
+	bb->cs = xe_sa_bo_cpu_addr(bb->bo);
+	bb->len = 0;
+
+	return bb;
+err:
+	kfree(bb);
+	return ERR_PTR(err);
+}
+
+static struct xe_sched_job *
+__xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
+{
+	u32 size = drm_suballoc_size(bb->bo);
+
+	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+
+	xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size);
+
+	xe_sa_bo_flush_write(bb->bo);
+
+	return xe_sched_job_create(q, addr);
+}
+
+struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
+						struct xe_bb *bb,
+						u64 batch_base_ofs,
+						u32 second_idx)
+{
+	u64 addr[2] = {
+		batch_base_ofs + drm_suballoc_soffset(bb->bo),
+		batch_base_ofs + drm_suballoc_soffset(bb->bo) +
+		4 * second_idx,
+	};
+
+	xe_gt_assert(q->gt, second_idx <= bb->len);
+	xe_gt_assert(q->gt, q->vm->flags & XE_VM_FLAG_MIGRATION);
+
+	return __xe_bb_create_job(q, bb, addr);
+}
+
+struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
+				      struct xe_bb *bb)
+{
+	u64 addr = xe_sa_bo_gpu_addr(bb->bo);
+
+	xe_gt_assert(q->gt, !(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION));
+	return __xe_bb_create_job(q, bb, &addr);
+}
+
+void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence)
+{
+	if (!bb)
+		return;
+
+	xe_sa_bo_free(bb->bo, fence);
+	kfree(bb);
+}
diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h
new file mode 100644
index 000000000000..fafacd73dcc3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BB_H_
+#define _XE_BB_H_
+
+#include "xe_bb_types.h"
+
+struct dma_fence;
+
+struct xe_gt;
+struct xe_exec_queue;
+struct xe_sched_job;
+
+struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm);
+struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
+				      struct xe_bb *bb);
+struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
+						struct xe_bb *bb, u64 batch_ofs,
+						u32 second_idx);
+void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bb_types.h b/drivers/gpu/drm/xe/xe_bb_types.h
new file mode 100644
index 000000000000..b7d30308cf90
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb_types.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BB_TYPES_H_
+#define _XE_BB_TYPES_H_
+
+#include <linux/types.h>
+
+struct drm_suballoc;
+
+struct xe_bb {
+	struct drm_suballoc *bo;
+
+	u32 *cs;
+	u32 len; /* in dwords */
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
new file mode 100644
index 000000000000..8e4a3b1f6b93
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -0,0 +1,2269 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_bo.h"
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_dma_buf.h"
+#include "xe_drm_client.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_migrate.h"
+#include "xe_preempt_fence.h"
+#include "xe_res_cursor.h"
+#include "xe_trace.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_vm.h"
+
+static const struct ttm_place sys_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.mem_type = XE_PL_SYSTEM,
+	.flags = 0,
+};
+
+static struct ttm_placement sys_placement = {
+	.num_placement = 1,
+	.placement = &sys_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags,
+};
+
+static const struct ttm_place tt_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.mem_type = XE_PL_TT,
+	.flags = 0,
+};
+
+static struct ttm_placement tt_placement = {
+	.num_placement = 1,
+	.placement = &tt_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags,
+};
+
+bool mem_type_is_vram(u32 mem_type)
+{
+	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
+}
+
+static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
+{
+	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
+}
+
+static bool resource_is_vram(struct ttm_resource *res)
+{
+	return mem_type_is_vram(res->mem_type);
+}
+
+bool xe_bo_is_vram(struct xe_bo *bo)
+{
+	return resource_is_vram(bo->ttm.resource) ||
+		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
+}
+
+bool xe_bo_is_stolen(struct xe_bo *bo)
+{
+	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
+}
+
+/**
+ * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
+ * @bo: The BO
+ *
+ * The stolen memory is accessed through the PCI BAR for both DGFX and some
+ * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
+ *
+ * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
+ */
+bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
+{
+	return xe_bo_is_stolen(bo) &&
+		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
+}
+
+static bool xe_bo_is_user(struct xe_bo *bo)
+{
+	return bo->flags & XE_BO_CREATE_USER_BIT;
+}
+
+static struct xe_migrate *
+mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
+{
+	struct xe_tile *tile;
+
+	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
+	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
+	return tile->migrate;
+}
+
+static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
+{
+	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
+	struct ttm_resource_manager *mgr;
+
+	xe_assert(xe, resource_is_vram(res));
+	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
+	return to_xe_ttm_vram_mgr(mgr)->vram;
+}
+
+static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
+			   u32 bo_flags, u32 *c)
+{
+	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
+	if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
+		bo->placements[*c] = (struct ttm_place) {
+			.mem_type = XE_PL_TT,
+		};
+		*c += 1;
+
+		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+			bo->props.preferred_mem_type = XE_PL_TT;
+	}
+}
+
+static void add_vram(struct xe_device *xe, struct xe_bo *bo,
+		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
+{
+	struct ttm_place place = { .mem_type = mem_type };
+	struct xe_mem_region *vram;
+	u64 io_size;
+
+	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
+	xe_assert(xe, vram && vram->usable_size);
+	io_size = vram->io_size;
+
+	/*
+	 * For eviction / restore on suspend / resume objects
+	 * pinned in VRAM must be contiguous
+	 */
+	if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
+			XE_BO_CREATE_GGTT_BIT))
+		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
+
+	if (io_size < vram->usable_size) {
+		if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
+			place.fpfn = 0;
+			place.lpfn = io_size >> PAGE_SHIFT;
+		} else {
+			place.flags |= TTM_PL_FLAG_TOPDOWN;
+		}
+	}
+	places[*c] = place;
+	*c += 1;
+
+	if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+		bo->props.preferred_mem_type = mem_type;
+}
+
+static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
+			 u32 bo_flags, u32 *c)
+{
+	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
+	if (bo->props.preferred_gt == XE_GT1) {
+		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
+			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
+		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
+			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
+	} else {
+		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
+			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
+		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
+			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
+	}
+}
+
+static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
+			   u32 bo_flags, u32 *c)
+{
+	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
+	if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
+		bo->placements[*c] = (struct ttm_place) {
+			.mem_type = XE_PL_STOLEN,
+			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
+					     XE_BO_CREATE_GGTT_BIT) ?
+				TTM_PL_FLAG_CONTIGUOUS : 0,
+		};
+		*c += 1;
+	}
+}
+
+static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+				       u32 bo_flags)
+{
+	u32 c = 0;
+
+	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
+
+	/* The order of placements should indicate preferred location */
+
+	if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) {
+		try_add_system(xe, bo, bo_flags, &c);
+		try_add_vram(xe, bo, bo_flags, &c);
+	} else {
+		try_add_vram(xe, bo, bo_flags, &c);
+		try_add_system(xe, bo, bo_flags, &c);
+	}
+	try_add_stolen(xe, bo, bo_flags, &c);
+
+	if (!c)
+		return -EINVAL;
+
+	bo->placement = (struct ttm_placement) {
+		.num_placement = c,
+		.placement = bo->placements,
+		.num_busy_placement = c,
+		.busy_placement = bo->placements,
+	};
+
+	return 0;
+}
+
+int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+			      u32 bo_flags)
+{
+	xe_bo_assert_held(bo);
+	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
+}
+
+static void xe_evict_flags(struct ttm_buffer_object *tbo,
+			   struct ttm_placement *placement)
+{
+	if (!xe_bo_is_xe_bo(tbo)) {
+		/* Don't handle scatter gather BOs */
+		if (tbo->type == ttm_bo_type_sg) {
+			placement->num_placement = 0;
+			placement->num_busy_placement = 0;
+			return;
+		}
+
+		*placement = sys_placement;
+		return;
+	}
+
+	/*
+	 * For xe, sg bos that are evicted to system just triggers a
+	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
+	 */
+	switch (tbo->resource->mem_type) {
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+	case XE_PL_STOLEN:
+		*placement = tt_placement;
+		break;
+	case XE_PL_TT:
+	default:
+		*placement = sys_placement;
+		break;
+	}
+}
+
+struct xe_ttm_tt {
+	struct ttm_tt ttm;
+	struct device *dev;
+	struct sg_table sgt;
+	struct sg_table *sg;
+};
+
+static int xe_tt_map_sg(struct ttm_tt *tt)
+{
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+	unsigned long num_pages = tt->num_pages;
+	int ret;
+
+	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
+
+	if (xe_tt->sg)
+		return 0;
+
+	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
+						num_pages, 0,
+						(u64)num_pages << PAGE_SHIFT,
+						xe_sg_segment_size(xe_tt->dev),
+						GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	xe_tt->sg = &xe_tt->sgt;
+	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
+			      DMA_ATTR_SKIP_CPU_SYNC);
+	if (ret) {
+		sg_free_table(xe_tt->sg);
+		xe_tt->sg = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+struct sg_table *xe_bo_sg(struct xe_bo *bo)
+{
+	struct ttm_tt *tt = bo->ttm.ttm;
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+	return xe_tt->sg;
+}
+
+static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
+				       u32 page_flags)
+{
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_ttm_tt *tt;
+	unsigned long extra_pages;
+	enum ttm_caching caching;
+	int err;
+
+	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
+	if (!tt)
+		return NULL;
+
+	tt->dev = xe->drm.dev;
+
+	extra_pages = 0;
+	if (xe_bo_needs_ccs_pages(bo))
+		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
+					   PAGE_SIZE);
+
+	switch (bo->cpu_caching) {
+	case DRM_XE_GEM_CPU_CACHING_WC:
+		caching = ttm_write_combined;
+		break;
+	default:
+		caching = ttm_cached;
+		break;
+	}
+
+	WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching);
+
+	/*
+	 * Display scanout is always non-coherent with the CPU cache.
+	 *
+	 * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
+	 * require a CPU:WC mapping.
+	 */
+	if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) ||
+	    (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE))
+		caching = ttm_write_combined;
+
+	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
+	if (err) {
+		kfree(tt);
+		return NULL;
+	}
+
+	return &tt->ttm;
+}
+
+static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
+			      struct ttm_operation_ctx *ctx)
+{
+	int err;
+
+	/*
+	 * dma-bufs are not populated with pages, and the dma-
+	 * addresses are set up when moved to XE_PL_TT.
+	 */
+	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
+		return 0;
+
+	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
+	if (err)
+		return err;
+
+	/* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
+	err = xe_tt_map_sg(tt);
+	if (err)
+		ttm_pool_free(&ttm_dev->pool, tt);
+
+	return err;
+}
+
+static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
+{
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
+		return;
+
+	if (xe_tt->sg) {
+		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
+				  DMA_BIDIRECTIONAL, 0);
+		sg_free_table(xe_tt->sg);
+		xe_tt->sg = NULL;
+	}
+
+	return ttm_pool_free(&ttm_dev->pool, tt);
+}
+
+static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
+{
+	ttm_tt_fini(tt);
+	kfree(tt);
+}
+
+static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
+				 struct ttm_resource *mem)
+{
+	struct xe_device *xe = ttm_to_xe_device(bdev);
+
+	switch (mem->mem_type) {
+	case XE_PL_SYSTEM:
+	case XE_PL_TT:
+		return 0;
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1: {
+		struct xe_ttm_vram_mgr_resource *vres =
+			to_xe_ttm_vram_mgr_resource(mem);
+		struct xe_mem_region *vram = res_to_mem_region(mem);
+
+		if (vres->used_visible_size < mem->size)
+			return -EINVAL;
+
+		mem->bus.offset = mem->start << PAGE_SHIFT;
+
+		if (vram->mapping &&
+		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
+			mem->bus.addr = (u8 *)vram->mapping +
+				mem->bus.offset;
+
+		mem->bus.offset += vram->io_start;
+		mem->bus.is_iomem = true;
+
+#if  !defined(CONFIG_X86)
+		mem->bus.caching = ttm_write_combined;
+#endif
+		return 0;
+	} case XE_PL_STOLEN:
+		return xe_ttm_stolen_io_mem_reserve(xe, mem);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
+				const struct ttm_operation_ctx *ctx)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	struct drm_gem_object *obj = &bo->ttm.base;
+	struct drm_gpuvm_bo *vm_bo;
+	bool idle = false;
+	int ret = 0;
+
+	dma_resv_assert_held(bo->ttm.base.resv);
+
+	if (!list_empty(&bo->ttm.base.gpuva.list)) {
+		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
+				    DMA_RESV_USAGE_BOOKKEEP);
+		dma_resv_for_each_fence_unlocked(&cursor, fence)
+			dma_fence_enable_sw_signaling(fence);
+		dma_resv_iter_end(&cursor);
+	}
+
+	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
+		struct drm_gpuva *gpuva;
+
+		if (!xe_vm_in_fault_mode(vm)) {
+			drm_gpuvm_bo_evict(vm_bo, true);
+			continue;
+		}
+
+		if (!idle) {
+			long timeout;
+
+			if (ctx->no_wait_gpu &&
+			    !dma_resv_test_signaled(bo->ttm.base.resv,
+						    DMA_RESV_USAGE_BOOKKEEP))
+				return -EBUSY;
+
+			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+							DMA_RESV_USAGE_BOOKKEEP,
+							ctx->interruptible,
+							MAX_SCHEDULE_TIMEOUT);
+			if (!timeout)
+				return -ETIME;
+			if (timeout < 0)
+				return timeout;
+
+			idle = true;
+		}
+
+		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
+			struct xe_vma *vma = gpuva_to_vma(gpuva);
+
+			trace_xe_vma_evict(vma);
+			ret = xe_vm_invalidate_vma(vma);
+			if (XE_WARN_ON(ret))
+				return ret;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
+ * Note that unmapping the attachment is deferred to the next
+ * map_attachment time, or to bo destroy (after idling) whichever comes first.
+ * This is to avoid syncing before unmap_attachment(), assuming that the
+ * caller relies on idling the reservation object before moving the
+ * backing store out. Should that assumption not hold, then we will be able
+ * to unconditionally call unmap_attachment() when moving out to system.
+ */
+static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
+			     struct ttm_resource *new_res)
+{
+	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
+	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
+					       ttm);
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	struct sg_table *sg;
+
+	xe_assert(xe, attach);
+	xe_assert(xe, ttm_bo->ttm);
+
+	if (new_res->mem_type == XE_PL_SYSTEM)
+		goto out;
+
+	if (ttm_bo->sg) {
+		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
+		ttm_bo->sg = NULL;
+	}
+
+	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sg))
+		return PTR_ERR(sg);
+
+	ttm_bo->sg = sg;
+	xe_tt->sg = sg;
+
+out:
+	ttm_bo_move_null(ttm_bo, new_res);
+
+	return 0;
+}
+
+/**
+ * xe_bo_move_notify - Notify subsystems of a pending move
+ * @bo: The buffer object
+ * @ctx: The struct ttm_operation_ctx controlling locking and waits.
+ *
+ * This function notifies subsystems of an upcoming buffer move.
+ * Upon receiving such a notification, subsystems should schedule
+ * halting access to the underlying pages and optionally add a fence
+ * to the buffer object's dma_resv object, that signals when access is
+ * stopped. The caller will wait on all dma_resv fences before
+ * starting the move.
+ *
+ * A subsystem may commence access to the object after obtaining
+ * bindings to the new backing memory under the object lock.
+ *
+ * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
+ * negative error code on error.
+ */
+static int xe_bo_move_notify(struct xe_bo *bo,
+			     const struct ttm_operation_ctx *ctx)
+{
+	struct ttm_buffer_object *ttm_bo = &bo->ttm;
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	int ret;
+
+	/*
+	 * If this starts to call into many components, consider
+	 * using a notification chain here.
+	 */
+
+	if (xe_bo_is_pinned(bo))
+		return -EINVAL;
+
+	xe_bo_vunmap(bo);
+	ret = xe_bo_trigger_rebind(xe, bo, ctx);
+	if (ret)
+		return ret;
+
+	/* Don't call move_notify() for imported dma-bufs. */
+	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
+		dma_buf_move_notify(ttm_bo->base.dma_buf);
+
+	return 0;
+}
+
+static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
+		      struct ttm_operation_ctx *ctx,
+		      struct ttm_resource *new_mem,
+		      struct ttm_place *hop)
+{
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct ttm_resource *old_mem = ttm_bo->resource;
+	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
+	struct ttm_tt *ttm = ttm_bo->ttm;
+	struct xe_migrate *migrate = NULL;
+	struct dma_fence *fence;
+	bool move_lacks_source;
+	bool tt_has_data;
+	bool needs_clear;
+	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
+				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
+	int ret = 0;
+	/* Bo creation path, moving to system or TT. */
+	if ((!old_mem && ttm) && !handle_system_ccs) {
+		ttm_bo_move_null(ttm_bo, new_mem);
+		return 0;
+	}
+
+	if (ttm_bo->type == ttm_bo_type_sg) {
+		ret = xe_bo_move_notify(bo, ctx);
+		if (!ret)
+			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
+		goto out;
+	}
+
+	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
+			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
+
+	move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
+						(!mem_type_is_vram(old_mem_type) && !tt_has_data);
+
+	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
+		(!ttm && ttm_bo->type == ttm_bo_type_device);
+
+	if ((move_lacks_source && !needs_clear)) {
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
+	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
+	/*
+	 * Failed multi-hop where the old_mem is still marked as
+	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
+	 */
+	if (old_mem_type == XE_PL_TT &&
+	    new_mem->mem_type == XE_PL_TT) {
+		ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
+	}
+
+	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
+		ret = xe_bo_move_notify(bo, ctx);
+		if (ret)
+			goto out;
+	}
+
+	if (old_mem_type == XE_PL_TT &&
+	    new_mem->mem_type == XE_PL_SYSTEM) {
+		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
+						     DMA_RESV_USAGE_BOOKKEEP,
+						     true,
+						     MAX_SCHEDULE_TIMEOUT);
+		if (timeout < 0) {
+			ret = timeout;
+			goto out;
+		}
+
+		if (!handle_system_ccs) {
+			ttm_bo_move_null(ttm_bo, new_mem);
+			goto out;
+		}
+	}
+
+	if (!move_lacks_source &&
+	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
+	     (mem_type_is_vram(old_mem_type) &&
+	      new_mem->mem_type == XE_PL_SYSTEM))) {
+		hop->fpfn = 0;
+		hop->lpfn = 0;
+		hop->mem_type = XE_PL_TT;
+		hop->flags = TTM_PL_FLAG_TEMPORARY;
+		ret = -EMULTIHOP;
+		goto out;
+	}
+
+	if (bo->tile)
+		migrate = bo->tile->migrate;
+	else if (resource_is_vram(new_mem))
+		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
+	else if (mem_type_is_vram(old_mem_type))
+		migrate = mem_type_to_migrate(xe, old_mem_type);
+	else
+		migrate = xe->tiles[0].migrate;
+
+	xe_assert(xe, migrate);
+
+	trace_xe_bo_move(bo);
+	xe_device_mem_access_get(xe);
+
+	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
+		/*
+		 * Kernel memory that is pinned should only be moved on suspend
+		 * / resume, some of the pinned memory is required for the
+		 * device to resume / use the GPU to move other evicted memory
+		 * (user memory) around. This likely could be optimized a bit
+		 * futher where we find the minimum set of pinned memory
+		 * required for resume but for simplity doing a memcpy for all
+		 * pinned memory.
+		 */
+		ret = xe_bo_vmap(bo);
+		if (!ret) {
+			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
+
+			/* Create a new VMAP once kernel BO back in VRAM */
+			if (!ret && resource_is_vram(new_mem)) {
+				struct xe_mem_region *vram = res_to_mem_region(new_mem);
+				void *new_addr = vram->mapping +
+					(new_mem->start << PAGE_SHIFT);
+
+				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
+					ret = -EINVAL;
+					xe_device_mem_access_put(xe);
+					goto out;
+				}
+
+				xe_assert(xe, new_mem->start ==
+					  bo->placements->fpfn);
+
+				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
+			}
+		}
+	} else {
+		if (move_lacks_source)
+			fence = xe_migrate_clear(migrate, bo, new_mem);
+		else
+			fence = xe_migrate_copy(migrate, bo, bo, old_mem,
+						new_mem, handle_system_ccs);
+		if (IS_ERR(fence)) {
+			ret = PTR_ERR(fence);
+			xe_device_mem_access_put(xe);
+			goto out;
+		}
+		if (!move_lacks_source) {
+			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
+							true, new_mem);
+			if (ret) {
+				dma_fence_wait(fence, false);
+				ttm_bo_move_null(ttm_bo, new_mem);
+				ret = 0;
+			}
+		} else {
+			/*
+			 * ttm_bo_move_accel_cleanup() may blow up if
+			 * bo->resource == NULL, so just attach the
+			 * fence and set the new resource.
+			 */
+			dma_resv_add_fence(ttm_bo->base.resv, fence,
+					   DMA_RESV_USAGE_KERNEL);
+			ttm_bo_move_null(ttm_bo, new_mem);
+		}
+
+		dma_fence_put(fence);
+	}
+
+	xe_device_mem_access_put(xe);
+
+out:
+	return ret;
+
+}
+
+/**
+ * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
+ * @bo: The buffer object to move.
+ *
+ * On successful completion, the object memory will be moved to sytem memory.
+ * This function blocks until the object has been fully moved.
+ *
+ * This is needed to for special handling of pinned VRAM object during
+ * suspend-resume.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_evict_pinned(struct xe_bo *bo)
+{
+	struct ttm_place place = {
+		.mem_type = XE_PL_TT,
+	};
+	struct ttm_placement placement = {
+		.placement = &place,
+		.num_placement = 1,
+	};
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+	};
+	struct ttm_resource *new_mem;
+	int ret;
+
+	xe_bo_assert_held(bo);
+
+	if (WARN_ON(!bo->ttm.resource))
+		return -EINVAL;
+
+	if (WARN_ON(!xe_bo_is_pinned(bo)))
+		return -EINVAL;
+
+	if (WARN_ON(!xe_bo_is_vram(bo)))
+		return -EINVAL;
+
+	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
+	if (ret)
+		return ret;
+
+	if (!bo->ttm.ttm) {
+		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
+		if (!bo->ttm.ttm) {
+			ret = -ENOMEM;
+			goto err_res_free;
+		}
+	}
+
+	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+	if (ret)
+		goto err_res_free;
+
+	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
+	if (ret)
+		goto err_res_free;
+
+	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
+	if (ret)
+		goto err_res_free;
+
+	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+			      false, MAX_SCHEDULE_TIMEOUT);
+
+	return 0;
+
+err_res_free:
+	ttm_resource_free(&bo->ttm, &new_mem);
+	return ret;
+}
+
+/**
+ * xe_bo_restore_pinned() - Restore a pinned VRAM object
+ * @bo: The buffer object to move.
+ *
+ * On successful completion, the object memory will be moved back to VRAM.
+ * This function blocks until the object has been fully moved.
+ *
+ * This is needed to for special handling of pinned VRAM object during
+ * suspend-resume.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_restore_pinned(struct xe_bo *bo)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+	};
+	struct ttm_resource *new_mem;
+	int ret;
+
+	xe_bo_assert_held(bo);
+
+	if (WARN_ON(!bo->ttm.resource))
+		return -EINVAL;
+
+	if (WARN_ON(!xe_bo_is_pinned(bo)))
+		return -EINVAL;
+
+	if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
+		return -EINVAL;
+
+	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
+	if (ret)
+		return ret;
+
+	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
+	if (ret)
+		goto err_res_free;
+
+	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
+	if (ret)
+		goto err_res_free;
+
+	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
+	if (ret)
+		goto err_res_free;
+
+	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+			      false, MAX_SCHEDULE_TIMEOUT);
+
+	return 0;
+
+err_res_free:
+	ttm_resource_free(&bo->ttm, &new_mem);
+	return ret;
+}
+
+static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
+				       unsigned long page_offset)
+{
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct xe_res_cursor cursor;
+	struct xe_mem_region *vram;
+
+	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
+		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
+
+	vram = res_to_mem_region(ttm_bo->resource);
+	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
+	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
+}
+
+static void __xe_bo_vunmap(struct xe_bo *bo);
+
+/*
+ * TODO: Move this function to TTM so we don't rely on how TTM does its
+ * locking, thereby abusing TTM internals.
+ */
+static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
+{
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	bool locked;
+
+	xe_assert(xe, !kref_read(&ttm_bo->kref));
+
+	/*
+	 * We can typically only race with TTM trylocking under the
+	 * lru_lock, which will immediately be unlocked again since
+	 * the ttm_bo refcount is zero at this point. So trylocking *should*
+	 * always succeed here, as long as we hold the lru lock.
+	 */
+	spin_lock(&ttm_bo->bdev->lru_lock);
+	locked = dma_resv_trylock(ttm_bo->base.resv);
+	spin_unlock(&ttm_bo->bdev->lru_lock);
+	xe_assert(xe, locked);
+
+	return locked;
+}
+
+static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	struct dma_fence *replacement = NULL;
+	struct xe_bo *bo;
+
+	if (!xe_bo_is_xe_bo(ttm_bo))
+		return;
+
+	bo = ttm_to_xe_bo(ttm_bo);
+	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
+
+	/*
+	 * Corner case where TTM fails to allocate memory and this BOs resv
+	 * still points the VMs resv
+	 */
+	if (ttm_bo->base.resv != &ttm_bo->base._resv)
+		return;
+
+	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
+		return;
+
+	/*
+	 * Scrub the preempt fences if any. The unbind fence is already
+	 * attached to the resv.
+	 * TODO: Don't do this for external bos once we scrub them after
+	 * unbind.
+	 */
+	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
+				DMA_RESV_USAGE_BOOKKEEP, fence) {
+		if (xe_fence_is_xe_preempt(fence) &&
+		    !dma_fence_is_signaled(fence)) {
+			if (!replacement)
+				replacement = dma_fence_get_stub();
+
+			dma_resv_replace_fences(ttm_bo->base.resv,
+						fence->context,
+						replacement,
+						DMA_RESV_USAGE_BOOKKEEP);
+		}
+	}
+	dma_fence_put(replacement);
+
+	dma_resv_unlock(ttm_bo->base.resv);
+}
+
+static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
+{
+	if (!xe_bo_is_xe_bo(ttm_bo))
+		return;
+
+	/*
+	 * Object is idle and about to be destroyed. Release the
+	 * dma-buf attachment.
+	 */
+	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
+		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
+						       struct xe_ttm_tt, ttm);
+
+		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
+					 DMA_BIDIRECTIONAL);
+		ttm_bo->sg = NULL;
+		xe_tt->sg = NULL;
+	}
+}
+
+struct ttm_device_funcs xe_ttm_funcs = {
+	.ttm_tt_create = xe_ttm_tt_create,
+	.ttm_tt_populate = xe_ttm_tt_populate,
+	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
+	.ttm_tt_destroy = xe_ttm_tt_destroy,
+	.evict_flags = xe_evict_flags,
+	.move = xe_bo_move,
+	.io_mem_reserve = xe_ttm_io_mem_reserve,
+	.io_mem_pfn = xe_ttm_io_mem_pfn,
+	.release_notify = xe_ttm_bo_release_notify,
+	.eviction_valuable = ttm_bo_eviction_valuable,
+	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
+};
+
+static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
+{
+	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+
+	if (bo->ttm.base.import_attach)
+		drm_prime_gem_destroy(&bo->ttm.base, NULL);
+	drm_gem_object_release(&bo->ttm.base);
+
+	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
+
+	if (bo->ggtt_node.size)
+		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
+
+#ifdef CONFIG_PROC_FS
+	if (bo->client)
+		xe_drm_client_remove_bo(bo);
+#endif
+
+	if (bo->vm && xe_bo_is_user(bo))
+		xe_vm_put(bo->vm);
+
+	kfree(bo);
+}
+
+static void xe_gem_object_free(struct drm_gem_object *obj)
+{
+	/* Our BO reference counting scheme works as follows:
+	 *
+	 * The gem object kref is typically used throughout the driver,
+	 * and the gem object holds a ttm_buffer_object refcount, so
+	 * that when the last gem object reference is put, which is when
+	 * we end up in this function, we put also that ttm_buffer_object
+	 * refcount. Anything using gem interfaces is then no longer
+	 * allowed to access the object in a way that requires a gem
+	 * refcount, including locking the object.
+	 *
+	 * driver ttm callbacks is allowed to use the ttm_buffer_object
+	 * refcount directly if needed.
+	 */
+	__xe_bo_vunmap(gem_to_xe_bo(obj));
+	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
+}
+
+static void xe_gem_object_close(struct drm_gem_object *obj,
+				struct drm_file *file_priv)
+{
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+
+	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
+		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
+
+		xe_bo_lock(bo, false);
+		ttm_bo_set_bulk_move(&bo->ttm, NULL);
+		xe_bo_unlock(bo);
+	}
+}
+
+static bool should_migrate_to_system(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
+}
+
+static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
+{
+	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
+	struct drm_device *ddev = tbo->base.dev;
+	vm_fault_t ret;
+	int idx, r = 0;
+
+	ret = ttm_bo_vm_reserve(tbo, vmf);
+	if (ret)
+		return ret;
+
+	if (drm_dev_enter(ddev, &idx)) {
+		struct xe_bo *bo = ttm_to_xe_bo(tbo);
+
+		trace_xe_bo_cpu_fault(bo);
+
+		if (should_migrate_to_system(bo)) {
+			r = xe_bo_migrate(bo, XE_PL_TT);
+			if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
+				ret = VM_FAULT_NOPAGE;
+			else if (r)
+				ret = VM_FAULT_SIGBUS;
+		}
+		if (!ret)
+			ret = ttm_bo_vm_fault_reserved(vmf,
+						       vmf->vma->vm_page_prot,
+						       TTM_BO_VM_NUM_PREFAULT);
+		drm_dev_exit(idx);
+	} else {
+		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+	}
+	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+		return ret;
+
+	dma_resv_unlock(tbo->base.resv);
+	return ret;
+}
+
+static const struct vm_operations_struct xe_gem_vm_ops = {
+	.fault = xe_gem_fault,
+	.open = ttm_bo_vm_open,
+	.close = ttm_bo_vm_close,
+	.access = ttm_bo_vm_access
+};
+
+static const struct drm_gem_object_funcs xe_gem_object_funcs = {
+	.free = xe_gem_object_free,
+	.close = xe_gem_object_close,
+	.mmap = drm_gem_ttm_mmap,
+	.export = xe_gem_prime_export,
+	.vm_ops = &xe_gem_vm_ops,
+};
+
+/**
+ * xe_bo_alloc - Allocate storage for a struct xe_bo
+ *
+ * This funcition is intended to allocate storage to be used for input
+ * to __xe_bo_create_locked(), in the case a pointer to the bo to be
+ * created is needed before the call to __xe_bo_create_locked().
+ * If __xe_bo_create_locked ends up never to be called, then the
+ * storage allocated with this function needs to be freed using
+ * xe_bo_free().
+ *
+ * Return: A pointer to an uninitialized struct xe_bo on success,
+ * ERR_PTR(-ENOMEM) on error.
+ */
+struct xe_bo *xe_bo_alloc(void)
+{
+	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	return bo;
+}
+
+/**
+ * xe_bo_free - Free storage allocated using xe_bo_alloc()
+ * @bo: The buffer object storage.
+ *
+ * Refer to xe_bo_alloc() documentation for valid use-cases.
+ */
+void xe_bo_free(struct xe_bo *bo)
+{
+	kfree(bo);
+}
+
+struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+				     struct xe_tile *tile, struct dma_resv *resv,
+				     struct ttm_lru_bulk_move *bulk, size_t size,
+				     u16 cpu_caching, enum ttm_bo_type type,
+				     u32 flags)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+	struct ttm_placement *placement;
+	uint32_t alignment;
+	size_t aligned_size;
+	int err;
+
+	/* Only kernel objects should set GT */
+	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
+
+	if (XE_WARN_ON(!size)) {
+		xe_bo_free(bo);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) &&
+	    !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
+	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
+		aligned_size = ALIGN(size, SZ_64K);
+		if (type != ttm_bo_type_device)
+			size = ALIGN(size, SZ_64K);
+		flags |= XE_BO_INTERNAL_64K;
+		alignment = SZ_64K >> PAGE_SHIFT;
+
+	} else {
+		aligned_size = ALIGN(size, SZ_4K);
+		flags &= ~XE_BO_INTERNAL_64K;
+		alignment = SZ_4K >> PAGE_SHIFT;
+	}
+
+	if (type == ttm_bo_type_device && aligned_size != size)
+		return ERR_PTR(-EINVAL);
+
+	if (!bo) {
+		bo = xe_bo_alloc();
+		if (IS_ERR(bo))
+			return bo;
+	}
+
+	bo->ccs_cleared = false;
+	bo->tile = tile;
+	bo->size = size;
+	bo->flags = flags;
+	bo->cpu_caching = cpu_caching;
+	bo->ttm.base.funcs = &xe_gem_object_funcs;
+	bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
+	bo->props.preferred_gt = XE_BO_PROPS_INVALID;
+	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
+	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
+	INIT_LIST_HEAD(&bo->pinned_link);
+#ifdef CONFIG_PROC_FS
+	INIT_LIST_HEAD(&bo->client_link);
+#endif
+
+	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
+
+	if (resv) {
+		ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT);
+		ctx.resv = resv;
+	}
+
+	if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) {
+		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
+		if (WARN_ON(err)) {
+			xe_ttm_bo_destroy(&bo->ttm);
+			return ERR_PTR(err);
+		}
+	}
+
+	/* Defer populating type_sg bos */
+	placement = (type == ttm_bo_type_sg ||
+		     bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
+		&bo->placement;
+	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
+				   placement, alignment,
+				   &ctx, NULL, resv, xe_ttm_bo_destroy);
+	if (err)
+		return ERR_PTR(err);
+
+	/*
+	 * The VRAM pages underneath are potentially still being accessed by the
+	 * GPU, as per async GPU clearing and async evictions. However TTM makes
+	 * sure to add any corresponding move/clear fences into the objects
+	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
+	 *
+	 * For KMD internal buffers we don't care about GPU clearing, however we
+	 * still need to handle async evictions, where the VRAM is still being
+	 * accessed by the GPU. Most internal callers are not expecting this,
+	 * since they are missing the required synchronisation before accessing
+	 * the memory. To keep things simple just sync wait any kernel fences
+	 * here, if the buffer is designated KMD internal.
+	 *
+	 * For normal userspace objects we should already have the required
+	 * pipelining or sync waiting elsewhere, since we already have to deal
+	 * with things like async GPU clearing.
+	 */
+	if (type == ttm_bo_type_kernel) {
+		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+						     DMA_RESV_USAGE_KERNEL,
+						     ctx.interruptible,
+						     MAX_SCHEDULE_TIMEOUT);
+
+		if (timeout < 0) {
+			if (!resv)
+				dma_resv_unlock(bo->ttm.base.resv);
+			xe_bo_put(bo);
+			return ERR_PTR(timeout);
+		}
+	}
+
+	bo->created = true;
+	if (bulk)
+		ttm_bo_set_bulk_move(&bo->ttm, bulk);
+	else
+		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+	return bo;
+}
+
+static int __xe_bo_fixed_placement(struct xe_device *xe,
+				   struct xe_bo *bo,
+				   u32 flags,
+				   u64 start, u64 end, u64 size)
+{
+	struct ttm_place *place = bo->placements;
+
+	if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT))
+		return -EINVAL;
+
+	place->flags = TTM_PL_FLAG_CONTIGUOUS;
+	place->fpfn = start >> PAGE_SHIFT;
+	place->lpfn = end >> PAGE_SHIFT;
+
+	switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) {
+	case XE_BO_CREATE_VRAM0_BIT:
+		place->mem_type = XE_PL_VRAM0;
+		break;
+	case XE_BO_CREATE_VRAM1_BIT:
+		place->mem_type = XE_PL_VRAM1;
+		break;
+	case XE_BO_CREATE_STOLEN_BIT:
+		place->mem_type = XE_PL_STOLEN;
+		break;
+
+	default:
+		/* 0 or multiple of the above set */
+		return -EINVAL;
+	}
+
+	bo->placement = (struct ttm_placement) {
+		.num_placement = 1,
+		.placement = place,
+		.num_busy_placement = 1,
+		.busy_placement = place,
+	};
+
+	return 0;
+}
+
+static struct xe_bo *
+__xe_bo_create_locked(struct xe_device *xe,
+		      struct xe_tile *tile, struct xe_vm *vm,
+		      size_t size, u64 start, u64 end,
+		      u16 cpu_caching, enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo = NULL;
+	int err;
+
+	if (vm)
+		xe_vm_assert_held(vm);
+
+	if (start || end != ~0ULL) {
+		bo = xe_bo_alloc();
+		if (IS_ERR(bo))
+			return bo;
+
+		flags |= XE_BO_FIXED_PLACEMENT_BIT;
+		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
+		if (err) {
+			xe_bo_free(bo);
+			return ERR_PTR(err);
+		}
+	}
+
+	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
+				    vm && !xe_vm_in_fault_mode(vm) &&
+				    flags & XE_BO_CREATE_USER_BIT ?
+				    &vm->lru_bulk_move : NULL, size,
+				    cpu_caching, type, flags);
+	if (IS_ERR(bo))
+		return bo;
+
+	/*
+	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
+	 * to ensure the shared resv doesn't disappear under the bo, the bo
+	 * will keep a reference to the vm, and avoid circular references
+	 * by having all the vm's bo refereferences released at vm close
+	 * time.
+	 */
+	if (vm && xe_bo_is_user(bo))
+		xe_vm_get(vm);
+	bo->vm = vm;
+
+	if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
+		if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
+			tile = xe_device_get_root_tile(xe);
+
+		xe_assert(xe, tile);
+
+		if (flags & XE_BO_FIXED_PLACEMENT_BIT) {
+			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
+						   start + bo->size, U64_MAX);
+		} else {
+			err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
+		}
+		if (err)
+			goto err_unlock_put_bo;
+	}
+
+	return bo;
+
+err_unlock_put_bo:
+	__xe_bo_unset_bulk_move(bo);
+	xe_bo_unlock_vm_held(bo);
+	xe_bo_put(bo);
+	return ERR_PTR(err);
+}
+
+struct xe_bo *
+xe_bo_create_locked_range(struct xe_device *xe,
+			  struct xe_tile *tile, struct xe_vm *vm,
+			  size_t size, u64 start, u64 end,
+			  enum ttm_bo_type type, u32 flags)
+{
+	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
+}
+
+struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
+				  struct xe_vm *vm, size_t size,
+				  enum ttm_bo_type type, u32 flags)
+{
+	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
+}
+
+struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
+				struct xe_vm *vm, size_t size,
+				u16 cpu_caching,
+				enum ttm_bo_type type,
+				u32 flags)
+{
+	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
+						 cpu_caching, type,
+						 flags | XE_BO_CREATE_USER_BIT);
+	if (!IS_ERR(bo))
+		xe_bo_unlock_vm_held(bo);
+
+	return bo;
+}
+
+struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
+			   struct xe_vm *vm, size_t size,
+			   enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
+
+	if (!IS_ERR(bo))
+		xe_bo_unlock_vm_held(bo);
+
+	return bo;
+}
+
+struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
+				      struct xe_vm *vm,
+				      size_t size, u64 offset,
+				      enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo;
+	int err;
+	u64 start = offset == ~0ull ? 0 : offset;
+	u64 end = offset == ~0ull ? offset : start + size;
+
+	if (flags & XE_BO_CREATE_STOLEN_BIT &&
+	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
+		flags |= XE_BO_CREATE_GGTT_BIT;
+
+	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
+				       flags | XE_BO_NEEDS_CPU_ACCESS);
+	if (IS_ERR(bo))
+		return bo;
+
+	err = xe_bo_pin(bo);
+	if (err)
+		goto err_put;
+
+	err = xe_bo_vmap(bo);
+	if (err)
+		goto err_unpin;
+
+	xe_bo_unlock_vm_held(bo);
+
+	return bo;
+
+err_unpin:
+	xe_bo_unpin(bo);
+err_put:
+	xe_bo_unlock_vm_held(bo);
+	xe_bo_put(bo);
+	return ERR_PTR(err);
+}
+
+struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
+				   struct xe_vm *vm, size_t size,
+				   enum ttm_bo_type type, u32 flags)
+{
+	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
+}
+
+struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
+				     const void *data, size_t size,
+				     enum ttm_bo_type type, u32 flags)
+{
+	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
+						ALIGN(size, PAGE_SIZE),
+						type, flags);
+	if (IS_ERR(bo))
+		return bo;
+
+	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
+
+	return bo;
+}
+
+static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg)
+{
+	xe_bo_unpin_map_no_vm(arg);
+}
+
+struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
+					   size_t size, u32 flags)
+{
+	struct xe_bo *bo;
+	int ret;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
+	if (IS_ERR(bo))
+		return bo;
+
+	ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return bo;
+}
+
+struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
+					     const void *data, size_t size, u32 flags)
+{
+	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
+
+	if (IS_ERR(bo))
+		return bo;
+
+	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
+
+	return bo;
+}
+
+/*
+ * XXX: This is in the VM bind data path, likely should calculate this once and
+ * store, with a recalculation if the BO is moved.
+ */
+uint64_t vram_region_gpu_offset(struct ttm_resource *res)
+{
+	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
+
+	if (res->mem_type == XE_PL_STOLEN)
+		return xe_ttm_stolen_gpu_offset(xe);
+
+	return res_to_mem_region(res)->dpa_base;
+}
+
+/**
+ * xe_bo_pin_external - pin an external BO
+ * @bo: buffer object to be pinned
+ *
+ * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
+ * BO. Unique call compared to xe_bo_pin as this function has it own set of
+ * asserts and code to ensure evict / restore on suspend / resume.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_bo_pin_external(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	int err;
+
+	xe_assert(xe, !bo->vm);
+	xe_assert(xe, xe_bo_is_user(bo));
+
+	if (!xe_bo_is_pinned(bo)) {
+		err = xe_bo_validate(bo, NULL, false);
+		if (err)
+			return err;
+
+		if (xe_bo_is_vram(bo)) {
+			spin_lock(&xe->pinned.lock);
+			list_add_tail(&bo->pinned_link,
+				      &xe->pinned.external_vram);
+			spin_unlock(&xe->pinned.lock);
+		}
+	}
+
+	ttm_bo_pin(&bo->ttm);
+
+	/*
+	 * FIXME: If we always use the reserve / unreserve functions for locking
+	 * we do not need this.
+	 */
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+	return 0;
+}
+
+int xe_bo_pin(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	int err;
+
+	/* We currently don't expect user BO to be pinned */
+	xe_assert(xe, !xe_bo_is_user(bo));
+
+	/* Pinned object must be in GGTT or have pinned flag */
+	xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT |
+				   XE_BO_CREATE_GGTT_BIT));
+
+	/*
+	 * No reason we can't support pinning imported dma-bufs we just don't
+	 * expect to pin an imported dma-buf.
+	 */
+	xe_assert(xe, !bo->ttm.base.import_attach);
+
+	/* We only expect at most 1 pin */
+	xe_assert(xe, !xe_bo_is_pinned(bo));
+
+	err = xe_bo_validate(bo, NULL, false);
+	if (err)
+		return err;
+
+	/*
+	 * For pinned objects in on DGFX, which are also in vram, we expect
+	 * these to be in contiguous VRAM memory. Required eviction / restore
+	 * during suspend / resume (force restore to same physical address).
+	 */
+	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
+	    bo->flags & XE_BO_INTERNAL_TEST)) {
+		struct ttm_place *place = &(bo->placements[0]);
+
+		if (mem_type_is_vram(place->mem_type)) {
+			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
+
+			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
+				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
+			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
+
+			spin_lock(&xe->pinned.lock);
+			list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+			spin_unlock(&xe->pinned.lock);
+		}
+	}
+
+	ttm_bo_pin(&bo->ttm);
+
+	/*
+	 * FIXME: If we always use the reserve / unreserve functions for locking
+	 * we do not need this.
+	 */
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+	return 0;
+}
+
+/**
+ * xe_bo_unpin_external - unpin an external BO
+ * @bo: buffer object to be unpinned
+ *
+ * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
+ * BO. Unique call compared to xe_bo_unpin as this function has it own set of
+ * asserts and code to ensure evict / restore on suspend / resume.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+void xe_bo_unpin_external(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	xe_assert(xe, !bo->vm);
+	xe_assert(xe, xe_bo_is_pinned(bo));
+	xe_assert(xe, xe_bo_is_user(bo));
+
+	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
+		spin_lock(&xe->pinned.lock);
+		list_del_init(&bo->pinned_link);
+		spin_unlock(&xe->pinned.lock);
+	}
+
+	ttm_bo_unpin(&bo->ttm);
+
+	/*
+	 * FIXME: If we always use the reserve / unreserve functions for locking
+	 * we do not need this.
+	 */
+	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+}
+
+void xe_bo_unpin(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	xe_assert(xe, !bo->ttm.base.import_attach);
+	xe_assert(xe, xe_bo_is_pinned(bo));
+
+	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
+	    bo->flags & XE_BO_INTERNAL_TEST)) {
+		struct ttm_place *place = &(bo->placements[0]);
+
+		if (mem_type_is_vram(place->mem_type)) {
+			xe_assert(xe, !list_empty(&bo->pinned_link));
+
+			spin_lock(&xe->pinned.lock);
+			list_del_init(&bo->pinned_link);
+			spin_unlock(&xe->pinned.lock);
+		}
+	}
+
+	ttm_bo_unpin(&bo->ttm);
+}
+
+/**
+ * xe_bo_validate() - Make sure the bo is in an allowed placement
+ * @bo: The bo,
+ * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
+ *      NULL. Used together with @allow_res_evict.
+ * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
+ *                   reservation object.
+ *
+ * Make sure the bo is in allowed placement, migrating it if necessary. If
+ * needed, other bos will be evicted. If bos selected for eviction shares
+ * the @vm's reservation object, they can be evicted iff @allow_res_evict is
+ * set to true, otherwise they will be bypassed.
+ *
+ * Return: 0 on success, negative error code on failure. May return
+ * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
+ */
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+
+	if (vm) {
+		lockdep_assert_held(&vm->lock);
+		xe_vm_assert_held(vm);
+
+		ctx.allow_res_evict = allow_res_evict;
+		ctx.resv = xe_vm_resv(vm);
+	}
+
+	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
+}
+
+bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
+{
+	if (bo->destroy == &xe_ttm_bo_destroy)
+		return true;
+
+	return false;
+}
+
+/*
+ * Resolve a BO address. There is no assert to check if the proper lock is held
+ * so it should only be used in cases where it is not fatal to get the wrong
+ * address, such as printing debug information, but not in cases where memory is
+ * written based on this result.
+ */
+dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	struct xe_res_cursor cur;
+	u64 page;
+
+	xe_assert(xe, page_size <= PAGE_SIZE);
+	page = offset >> PAGE_SHIFT;
+	offset &= (PAGE_SIZE - 1);
+
+	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
+		xe_assert(xe, bo->ttm.ttm);
+
+		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
+				page_size, &cur);
+		return xe_res_dma(&cur) + offset;
+	} else {
+		struct xe_res_cursor cur;
+
+		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
+			     page_size, &cur);
+		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
+	}
+}
+
+dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
+{
+	if (!READ_ONCE(bo->ttm.pin_count))
+		xe_bo_assert_held(bo);
+	return __xe_bo_addr(bo, offset, page_size);
+}
+
+int xe_bo_vmap(struct xe_bo *bo)
+{
+	void *virtual;
+	bool is_iomem;
+	int ret;
+
+	xe_bo_assert_held(bo);
+
+	if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
+		return -EINVAL;
+
+	if (!iosys_map_is_null(&bo->vmap))
+		return 0;
+
+	/*
+	 * We use this more or less deprecated interface for now since
+	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
+	 * single page bos, which is done here.
+	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
+	 * to use struct iosys_map.
+	 */
+	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
+	if (ret)
+		return ret;
+
+	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
+	if (is_iomem)
+		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
+	else
+		iosys_map_set_vaddr(&bo->vmap, virtual);
+
+	return 0;
+}
+
+static void __xe_bo_vunmap(struct xe_bo *bo)
+{
+	if (!iosys_map_is_null(&bo->vmap)) {
+		iosys_map_clear(&bo->vmap);
+		ttm_bo_kunmap(&bo->kmap);
+	}
+}
+
+void xe_bo_vunmap(struct xe_bo *bo)
+{
+	xe_bo_assert_held(bo);
+	__xe_bo_vunmap(bo);
+}
+
+int xe_gem_create_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_gem_create *args = data;
+	struct xe_vm *vm = NULL;
+	struct xe_bo *bo;
+	unsigned int bo_flags;
+	u32 handle;
+	int err;
+
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	/* at least one valid memory placement must be specified */
+	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
+			 !args->placement))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags &
+			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
+			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
+			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->handle))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !args->size))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
+		return -EINVAL;
+
+	bo_flags = 0;
+	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
+		bo_flags |= XE_BO_DEFER_BACKING;
+
+	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
+		bo_flags |= XE_BO_SCANOUT_BIT;
+
+	bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
+
+	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
+		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
+			return -EINVAL;
+
+		bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
+	}
+
+	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
+			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK &&
+			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT &&
+			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
+		return -EINVAL;
+
+	if (args->vm_id) {
+		vm = xe_vm_lookup(xef, args->vm_id);
+		if (XE_IOCTL_DBG(xe, !vm))
+			return -ENOENT;
+		err = xe_vm_lock(vm, true);
+		if (err)
+			goto out_vm;
+	}
+
+	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
+			       ttm_bo_type_device, bo_flags);
+
+	if (vm)
+		xe_vm_unlock(vm);
+
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto out_vm;
+	}
+
+	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
+	if (err)
+		goto out_bulk;
+
+	args->handle = handle;
+	goto out_put;
+
+out_bulk:
+	if (vm && !xe_vm_in_fault_mode(vm)) {
+		xe_vm_lock(vm, false);
+		__xe_bo_unset_bulk_move(bo);
+		xe_vm_unlock(vm);
+	}
+out_put:
+	xe_bo_put(bo);
+out_vm:
+	if (vm)
+		xe_vm_put(vm);
+
+	return err;
+}
+
+int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct drm_xe_gem_mmap_offset *args = data;
+	struct drm_gem_object *gem_obj;
+
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags))
+		return -EINVAL;
+
+	gem_obj = drm_gem_object_lookup(file, args->handle);
+	if (XE_IOCTL_DBG(xe, !gem_obj))
+		return -ENOENT;
+
+	/* The mmap offset was set up at BO allocation time. */
+	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+
+	xe_bo_put(gem_to_xe_bo(gem_obj));
+	return 0;
+}
+
+/**
+ * xe_bo_lock() - Lock the buffer object's dma_resv object
+ * @bo: The struct xe_bo whose lock is to be taken
+ * @intr: Whether to perform any wait interruptible
+ *
+ * Locks the buffer object's dma_resv object. If the buffer object is
+ * pointing to a shared dma_resv object, that shared lock is locked.
+ *
+ * Return: 0 on success, -EINTR if @intr is true and the wait for a
+ * contended lock was interrupted. If @intr is set to false, the
+ * function always returns 0.
+ */
+int xe_bo_lock(struct xe_bo *bo, bool intr)
+{
+	if (intr)
+		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
+
+	dma_resv_lock(bo->ttm.base.resv, NULL);
+
+	return 0;
+}
+
+/**
+ * xe_bo_unlock() - Unlock the buffer object's dma_resv object
+ * @bo: The struct xe_bo whose lock is to be released.
+ *
+ * Unlock a buffer object lock that was locked by xe_bo_lock().
+ */
+void xe_bo_unlock(struct xe_bo *bo)
+{
+	dma_resv_unlock(bo->ttm.base.resv);
+}
+
+/**
+ * xe_bo_can_migrate - Whether a buffer object likely can be migrated
+ * @bo: The buffer object to migrate
+ * @mem_type: The TTM memory type intended to migrate to
+ *
+ * Check whether the buffer object supports migration to the
+ * given memory type. Note that pinning may affect the ability to migrate as
+ * returned by this function.
+ *
+ * This function is primarily intended as a helper for checking the
+ * possibility to migrate buffer objects and can be called without
+ * the object lock held.
+ *
+ * Return: true if migration is possible, false otherwise.
+ */
+bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
+{
+	unsigned int cur_place;
+
+	if (bo->ttm.type == ttm_bo_type_kernel)
+		return true;
+
+	if (bo->ttm.type == ttm_bo_type_sg)
+		return false;
+
+	for (cur_place = 0; cur_place < bo->placement.num_placement;
+	     cur_place++) {
+		if (bo->placements[cur_place].mem_type == mem_type)
+			return true;
+	}
+
+	return false;
+}
+
+static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
+{
+	memset(place, 0, sizeof(*place));
+	place->mem_type = mem_type;
+}
+
+/**
+ * xe_bo_migrate - Migrate an object to the desired region id
+ * @bo: The buffer object to migrate.
+ * @mem_type: The TTM region type to migrate to.
+ *
+ * Attempt to migrate the buffer object to the desired memory region. The
+ * buffer object may not be pinned, and must be locked.
+ * On successful completion, the object memory type will be updated,
+ * but an async migration task may not have completed yet, and to
+ * accomplish that, the object's kernel fences must be signaled with
+ * the object lock held.
+ *
+ * Return: 0 on success. Negative error code on failure. In particular may
+ * return -EINTR or -ERESTARTSYS if signal pending.
+ */
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
+{
+	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+	struct ttm_placement placement;
+	struct ttm_place requested;
+
+	xe_bo_assert_held(bo);
+
+	if (bo->ttm.resource->mem_type == mem_type)
+		return 0;
+
+	if (xe_bo_is_pinned(bo))
+		return -EBUSY;
+
+	if (!xe_bo_can_migrate(bo, mem_type))
+		return -EINVAL;
+
+	xe_place_from_ttm_type(mem_type, &requested);
+	placement.num_placement = 1;
+	placement.num_busy_placement = 1;
+	placement.placement = &requested;
+	placement.busy_placement = &requested;
+
+	/*
+	 * Stolen needs to be handled like below VRAM handling if we ever need
+	 * to support it.
+	 */
+	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
+
+	if (mem_type_is_vram(mem_type)) {
+		u32 c = 0;
+
+		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
+	}
+
+	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
+}
+
+/**
+ * xe_bo_evict - Evict an object to evict placement
+ * @bo: The buffer object to migrate.
+ * @force_alloc: Set force_alloc in ttm_operation_ctx
+ *
+ * On successful completion, the object memory will be moved to evict
+ * placement. Ths function blocks until the object has been fully moved.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false,
+		.force_alloc = force_alloc,
+	};
+	struct ttm_placement placement;
+	int ret;
+
+	xe_evict_flags(&bo->ttm, &placement);
+	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
+	if (ret)
+		return ret;
+
+	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+			      false, MAX_SCHEDULE_TIMEOUT);
+
+	return 0;
+}
+
+/**
+ * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
+ * placed in system memory.
+ * @bo: The xe_bo
+ *
+ * Return: true if extra pages need to be allocated, false otherwise.
+ */
+bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+
+	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
+		return false;
+
+	/* On discrete GPUs, if the GPU can access this buffer from
+	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
+	 * can't be used since there's no CCS storage associated with
+	 * non-VRAM addresses.
+	 */
+	if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT))
+		return false;
+
+	return true;
+}
+
+/**
+ * __xe_bo_release_dummy() - Dummy kref release function
+ * @kref: The embedded struct kref.
+ *
+ * Dummy release function for xe_bo_put_deferred(). Keep off.
+ */
+void __xe_bo_release_dummy(struct kref *kref)
+{
+}
+
+/**
+ * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
+ * @deferred: The lockless list used for the call to xe_bo_put_deferred().
+ *
+ * Puts all bos whose put was deferred by xe_bo_put_deferred().
+ * The @deferred list can be either an onstack local list or a global
+ * shared list used by a workqueue.
+ */
+void xe_bo_put_commit(struct llist_head *deferred)
+{
+	struct llist_node *freed;
+	struct xe_bo *bo, *next;
+
+	if (!deferred)
+		return;
+
+	freed = llist_del_all(deferred);
+	if (!freed)
+		return;
+
+	llist_for_each_entry_safe(bo, next, freed, freed)
+		drm_gem_object_free(&bo->ttm.base.refcount);
+}
+
+/**
+ * xe_bo_dumb_create - Create a dumb bo as backing for a fb
+ * @file_priv: ...
+ * @dev: ...
+ * @args: ...
+ *
+ * See dumb_create() hook in include/drm/drm_drv.h
+ *
+ * Return: ...
+ */
+int xe_bo_dumb_create(struct drm_file *file_priv,
+		      struct drm_device *dev,
+		      struct drm_mode_create_dumb *args)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_bo *bo;
+	uint32_t handle;
+	int cpp = DIV_ROUND_UP(args->bpp, 8);
+	int err;
+	u32 page_size = max_t(u32, PAGE_SIZE,
+		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
+
+	args->pitch = ALIGN(args->width * cpp, 64);
+	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
+			   page_size);
+
+	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
+			       DRM_XE_GEM_CPU_CACHING_WC,
+			       ttm_bo_type_device,
+			       XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
+			       XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
+			       XE_BO_NEEDS_CPU_ACCESS);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_put(&bo->ttm.base);
+	if (!err)
+		args->handle = handle;
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_bo.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
new file mode 100644
index 000000000000..9b1279aca127
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -0,0 +1,355 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_BO_H_
+#define _XE_BO_H_
+
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo_types.h"
+#include "xe_macros.h"
+#include "xe_vm_types.h"
+#include "xe_vm.h"
+
+/**
+ * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held.
+ * @vm: The vm
+ */
+#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
+
+
+
+#define XE_DEFAULT_GTT_SIZE_MB          3072ULL /* 3GB by default */
+
+#define XE_BO_CREATE_USER_BIT		BIT(0)
+/* The bits below need to be contiguous, or things break */
+#define XE_BO_CREATE_SYSTEM_BIT		BIT(1)
+#define XE_BO_CREATE_VRAM0_BIT		BIT(2)
+#define XE_BO_CREATE_VRAM1_BIT		BIT(3)
+#define XE_BO_CREATE_VRAM_MASK		(XE_BO_CREATE_VRAM0_BIT | \
+					 XE_BO_CREATE_VRAM1_BIT)
+/* -- */
+#define XE_BO_CREATE_STOLEN_BIT		BIT(4)
+#define XE_BO_CREATE_VRAM_IF_DGFX(tile) \
+	(IS_DGFX(tile_to_xe(tile)) ? XE_BO_CREATE_VRAM0_BIT << (tile)->id : \
+	 XE_BO_CREATE_SYSTEM_BIT)
+#define XE_BO_CREATE_GGTT_BIT		BIT(5)
+#define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6)
+#define XE_BO_CREATE_PINNED_BIT		BIT(7)
+#define XE_BO_CREATE_NO_RESV_EVICT	BIT(8)
+#define XE_BO_DEFER_BACKING		BIT(9)
+#define XE_BO_SCANOUT_BIT		BIT(10)
+#define XE_BO_FIXED_PLACEMENT_BIT	BIT(11)
+#define XE_BO_PAGETABLE			BIT(12)
+#define XE_BO_NEEDS_CPU_ACCESS		BIT(13)
+/* this one is trigger internally only */
+#define XE_BO_INTERNAL_TEST		BIT(30)
+#define XE_BO_INTERNAL_64K		BIT(31)
+
+#define XELPG_PPGTT_PTE_PAT3		BIT_ULL(62)
+#define XE2_PPGTT_PTE_PAT4		BIT_ULL(61)
+#define XE_PPGTT_PDE_PDPE_PAT2		BIT_ULL(12)
+#define XE_PPGTT_PTE_PAT2		BIT_ULL(7)
+#define XE_PPGTT_PTE_PAT1		BIT_ULL(4)
+#define XE_PPGTT_PTE_PAT0		BIT_ULL(3)
+
+#define XE_PTE_SHIFT			12
+#define XE_PAGE_SIZE			(1 << XE_PTE_SHIFT)
+#define XE_PTE_MASK			(XE_PAGE_SIZE - 1)
+#define XE_PDE_SHIFT			(XE_PTE_SHIFT - 3)
+#define XE_PDES				(1 << XE_PDE_SHIFT)
+#define XE_PDE_MASK			(XE_PDES - 1)
+
+#define XE_64K_PTE_SHIFT		16
+#define XE_64K_PAGE_SIZE		(1 << XE_64K_PTE_SHIFT)
+#define XE_64K_PTE_MASK			(XE_64K_PAGE_SIZE - 1)
+#define XE_64K_PDE_MASK			(XE_PDE_MASK >> 4)
+
+#define XE_PDE_PS_2M			BIT_ULL(7)
+#define XE_PDPE_PS_1G			BIT_ULL(7)
+#define XE_PDE_IPS_64K			BIT_ULL(11)
+
+#define XE_GGTT_PTE_DM			BIT_ULL(1)
+#define XE_USM_PPGTT_PTE_AE		BIT_ULL(10)
+#define XE_PPGTT_PTE_DM			BIT_ULL(11)
+#define XE_PDE_64K			BIT_ULL(6)
+#define XE_PTE_PS64			BIT_ULL(8)
+#define XE_PTE_NULL			BIT_ULL(9)
+
+#define XE_PAGE_PRESENT			BIT_ULL(0)
+#define XE_PAGE_RW			BIT_ULL(1)
+
+#define XE_PL_SYSTEM		TTM_PL_SYSTEM
+#define XE_PL_TT		TTM_PL_TT
+#define XE_PL_VRAM0		TTM_PL_VRAM
+#define XE_PL_VRAM1		(XE_PL_VRAM0 + 1)
+#define XE_PL_STOLEN		(TTM_NUM_MEM_TYPES - 1)
+
+#define XE_BO_PROPS_INVALID	(-1)
+
+struct sg_table;
+
+struct xe_bo *xe_bo_alloc(void);
+void xe_bo_free(struct xe_bo *bo);
+
+struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+				     struct xe_tile *tile, struct dma_resv *resv,
+				     struct ttm_lru_bulk_move *bulk, size_t size,
+				     u16 cpu_caching, enum ttm_bo_type type,
+				     u32 flags);
+struct xe_bo *
+xe_bo_create_locked_range(struct xe_device *xe,
+			  struct xe_tile *tile, struct xe_vm *vm,
+			  size_t size, u64 start, u64 end,
+			  enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
+				  struct xe_vm *vm, size_t size,
+				  enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
+			   struct xe_vm *vm, size_t size,
+			   enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
+				struct xe_vm *vm, size_t size,
+				u16 cpu_caching,
+				enum ttm_bo_type type,
+				u32 flags);
+struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
+				   struct xe_vm *vm, size_t size,
+				   enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
+				      struct xe_vm *vm, size_t size, u64 offset,
+				      enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
+				     const void *data, size_t size,
+				     enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
+					   size_t size, u32 flags);
+struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
+					     const void *data, size_t size, u32 flags);
+
+int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+			      u32 bo_flags);
+
+static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo)
+{
+	return container_of(bo, struct xe_bo, ttm);
+}
+
+static inline struct xe_bo *gem_to_xe_bo(const struct drm_gem_object *obj)
+{
+	return container_of(obj, struct xe_bo, ttm.base);
+}
+
+#define xe_bo_device(bo) ttm_to_xe_device((bo)->ttm.bdev)
+
+static inline struct xe_bo *xe_bo_get(struct xe_bo *bo)
+{
+	if (bo)
+		drm_gem_object_get(&bo->ttm.base);
+
+	return bo;
+}
+
+static inline void xe_bo_put(struct xe_bo *bo)
+{
+	if (bo)
+		drm_gem_object_put(&bo->ttm.base);
+}
+
+static inline void __xe_bo_unset_bulk_move(struct xe_bo *bo)
+{
+	if (bo)
+		ttm_bo_set_bulk_move(&bo->ttm, NULL);
+}
+
+static inline void xe_bo_assert_held(struct xe_bo *bo)
+{
+	if (bo)
+		dma_resv_assert_held((bo)->ttm.base.resv);
+}
+
+int xe_bo_lock(struct xe_bo *bo, bool intr);
+
+void xe_bo_unlock(struct xe_bo *bo);
+
+static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
+{
+	if (bo) {
+		XE_WARN_ON(bo->vm && bo->ttm.base.resv != xe_vm_resv(bo->vm));
+		if (bo->vm)
+			xe_vm_assert_held(bo->vm);
+		else
+			dma_resv_unlock(bo->ttm.base.resv);
+	}
+}
+
+int xe_bo_pin_external(struct xe_bo *bo);
+int xe_bo_pin(struct xe_bo *bo);
+void xe_bo_unpin_external(struct xe_bo *bo);
+void xe_bo_unpin(struct xe_bo *bo);
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict);
+
+static inline bool xe_bo_is_pinned(struct xe_bo *bo)
+{
+	return bo->ttm.pin_count;
+}
+
+static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo)
+{
+	if (likely(bo)) {
+		xe_bo_lock(bo, false);
+		xe_bo_unpin(bo);
+		xe_bo_unlock(bo);
+
+		xe_bo_put(bo);
+	}
+}
+
+bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo);
+dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size);
+dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size);
+
+static inline dma_addr_t
+xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
+{
+	return xe_bo_addr(bo, 0, page_size);
+}
+
+static inline u32
+xe_bo_ggtt_addr(struct xe_bo *bo)
+{
+	XE_WARN_ON(bo->ggtt_node.size > bo->size);
+	XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32));
+	return bo->ggtt_node.start;
+}
+
+int xe_bo_vmap(struct xe_bo *bo);
+void xe_bo_vunmap(struct xe_bo *bo);
+
+bool mem_type_is_vram(u32 mem_type);
+bool xe_bo_is_vram(struct xe_bo *bo);
+bool xe_bo_is_stolen(struct xe_bo *bo);
+bool xe_bo_is_stolen_devmem(struct xe_bo *bo);
+uint64_t vram_region_gpu_offset(struct ttm_resource *res);
+
+bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
+
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type);
+int xe_bo_evict(struct xe_bo *bo, bool force_alloc);
+
+int xe_bo_evict_pinned(struct xe_bo *bo);
+int xe_bo_restore_pinned(struct xe_bo *bo);
+
+extern struct ttm_device_funcs xe_ttm_funcs;
+
+int xe_gem_create_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int xe_bo_dumb_create(struct drm_file *file_priv,
+		      struct drm_device *dev,
+		      struct drm_mode_create_dumb *args);
+
+bool xe_bo_needs_ccs_pages(struct xe_bo *bo);
+
+static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo)
+{
+	return PAGE_ALIGN(bo->ttm.base.size);
+}
+
+static inline bool xe_bo_has_pages(struct xe_bo *bo)
+{
+	if ((bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) ||
+	    xe_bo_is_vram(bo))
+		return true;
+
+	return false;
+}
+
+void __xe_bo_release_dummy(struct kref *kref);
+
+/**
+ * xe_bo_put_deferred() - Put a buffer object with delayed final freeing
+ * @bo: The bo to put.
+ * @deferred: List to which to add the buffer object if we cannot put, or
+ * NULL if the function is to put unconditionally.
+ *
+ * Since the final freeing of an object includes both sleeping and (!)
+ * memory allocation in the dma_resv individualization, it's not ok
+ * to put an object from atomic context nor from within a held lock
+ * tainted by reclaim. In such situations we want to defer the final
+ * freeing until we've exited the restricting context, or in the worst
+ * case to a workqueue.
+ * This function either puts the object if possible without the refcount
+ * reaching zero, or adds it to the @deferred list if that was not possible.
+ * The caller needs to follow up with a call to xe_bo_put_commit() to actually
+ * put the bo iff this function returns true. It's safe to always
+ * follow up with a call to xe_bo_put_commit().
+ * TODO: It's TTM that is the villain here. Perhaps TTM should add an
+ * interface like this.
+ *
+ * Return: true if @bo was the first object put on the @freed list,
+ * false otherwise.
+ */
+static inline bool
+xe_bo_put_deferred(struct xe_bo *bo, struct llist_head *deferred)
+{
+	if (!deferred) {
+		xe_bo_put(bo);
+		return false;
+	}
+
+	if (!kref_put(&bo->ttm.base.refcount, __xe_bo_release_dummy))
+		return false;
+
+	return llist_add(&bo->freed, deferred);
+}
+
+void xe_bo_put_commit(struct llist_head *deferred);
+
+struct sg_table *xe_bo_sg(struct xe_bo *bo);
+
+/*
+ * xe_sg_segment_size() - Provides upper limit for sg segment size.
+ * @dev: device pointer
+ *
+ * Returns the maximum segment size for the 'struct scatterlist'
+ * elements.
+ */
+static inline unsigned int xe_sg_segment_size(struct device *dev)
+{
+	struct scatterlist __maybe_unused sg;
+	size_t max = BIT_ULL(sizeof(sg.length) * 8) - 1;
+
+	max = min_t(size_t, max, dma_max_mapping_size(dev));
+
+	/*
+	 * The iommu_dma_map_sg() function ensures iova allocation doesn't
+	 * cross dma segment boundary. It does so by padding some sg elements.
+	 * This can cause overflow, ending up with sg->length being set to 0.
+	 * Avoid this by ensuring maximum segment size is half of 'max'
+	 * rounded down to PAGE_SIZE.
+	 */
+	return round_down(max / 2, PAGE_SIZE);
+}
+
+#define i915_gem_object_flush_if_display(obj)		((void)(obj))
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+/**
+ * xe_bo_is_mem_type - Whether the bo currently resides in the given
+ * TTM memory type
+ * @bo: The bo to check.
+ * @mem_type: The TTM memory type.
+ *
+ * Return: true iff the bo resides in @mem_type, false otherwise.
+ */
+static inline bool xe_bo_is_mem_type(struct xe_bo *bo, u32 mem_type)
+{
+	xe_bo_assert_held(bo);
+	return bo->ttm.resource->mem_type == mem_type;
+}
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h
new file mode 100644
index 000000000000..f57d440cc95a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_doc.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_DOC_H_
+#define _XE_BO_DOC_H_
+
+/**
+ * DOC: Buffer Objects (BO)
+ *
+ * BO management
+ * =============
+ *
+ * TTM manages (placement, eviction, etc...) all BOs in XE.
+ *
+ * BO creation
+ * ===========
+ *
+ * Create a chunk of memory which can be used by the GPU. Placement rules
+ * (sysmem or vram region) passed in upon creation. TTM handles placement of BO
+ * and can trigger eviction of other BOs to make space for the new BO.
+ *
+ * Kernel BOs
+ * ----------
+ *
+ * A kernel BO is created as part of driver load (e.g. uC firmware images, GuC
+ * ADS, etc...) or a BO created as part of a user operation which requires
+ * a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs
+ * are typically mapped in the GGTT (any kernel BOs aside memory for page tables
+ * are in the GGTT), are pinned (can't move or be evicted at runtime), have a
+ * vmap (XE can access the memory via xe_map layer) and have contiguous physical
+ * memory.
+ *
+ * More details of why kernel BOs are pinned and contiguous below.
+ *
+ * User BOs
+ * --------
+ *
+ * A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is
+ * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user
+ * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All
+ * user BOs are evictable and user BOs are never pinned by XE. The allocation of
+ * the backing store can be defered from creation time until first use which is
+ * either mmap, bind, or pagefault.
+ *
+ * Private BOs
+ * ~~~~~~~~~~~
+ *
+ * A private BO is a user BO created with a valid VM argument passed into the
+ * create IOCTL. If a BO is private it cannot be exported via prime FD and
+ * mappings can only be created for the BO within the VM it is tied to. Lastly,
+ * the BO dma-resv slots / lock point to the VM's dma-resv slots / lock (all
+ * private BOs to a VM share common dma-resv slots / lock).
+ *
+ * External BOs
+ * ~~~~~~~~~~~~
+ *
+ * An external BO is a user BO created with a NULL VM argument passed into the
+ * create IOCTL. An external BO can be shared with different UMDs / devices via
+ * prime FD and the BO can be mapped into multiple VMs. An external BO has its
+ * own unique dma-resv slots / lock. An external BO will be in an array of all
+ * VMs which has a mapping of the BO. This allows VMs to lookup and lock all
+ * external BOs mapped in the VM as needed.
+ *
+ * BO placement
+ * ~~~~~~~~~~~~
+ *
+ * When a user BO is created, a mask of valid placements is passed indicating
+ * which memory regions are considered valid.
+ *
+ * The memory region information is available via query uAPI (TODO: add link).
+ *
+ * BO validation
+ * =============
+ *
+ * BO validation (ttm_bo_validate) refers to ensuring a BO has a valid
+ * placement. If a BO was swapped to temporary storage, a validation call will
+ * trigger a move back to a valid (location where GPU can access BO) placement.
+ * Validation of a BO may evict other BOs to make room for the BO being
+ * validated.
+ *
+ * BO eviction / moving
+ * ====================
+ *
+ * All eviction (or in other words, moving a BO from one memory location to
+ * another) is routed through TTM with a callback into XE.
+ *
+ * Runtime eviction
+ * ----------------
+ *
+ * Runtime evictions refers to during normal operations where TTM decides it
+ * needs to move a BO. Typically this is because TTM needs to make room for
+ * another BO and the evicted BO is first BO on LRU list that is not locked.
+ *
+ * An example of this is a new BO which can only be placed in VRAM but there is
+ * not space in VRAM. There could be multiple BOs which have sysmem and VRAM
+ * placement rules which currently reside in VRAM, TTM trigger a will move of
+ * one (or multiple) of these BO(s) until there is room in VRAM to place the new
+ * BO. The evicted BO(s) are valid but still need new bindings before the BO
+ * used again (exec or compute mode rebind worker).
+ *
+ * Another example would be, TTM can't find a BO to evict which has another
+ * valid placement. In this case TTM will evict one (or multiple) unlocked BO(s)
+ * to a temporary unreachable (invalid) placement. The evicted BO(s) are invalid
+ * and before next use need to be moved to a valid placement and rebound.
+ *
+ * In both cases, moves of these BOs are scheduled behind the fences in the BO's
+ * dma-resv slots.
+ *
+ * WW locking tries to ensures if 2 VMs use 51% of the memory forward progress
+ * is made on both VMs.
+ *
+ * Runtime eviction uses per a GT migration engine (TODO: link to migration
+ * engine doc) to do a GPU memcpy from one location to another.
+ *
+ * Rebinds after runtime eviction
+ * ------------------------------
+ *
+ * When BOs are moved, every mapping (VMA) of the BO needs to rebound before
+ * the BO is used again. Every VMA is added to an evicted list of its VM when
+ * the BO is moved. This is safe because of the VM locking structure (TODO: link
+ * to VM locking doc). On the next use of a VM (exec or compute mode rebind
+ * worker) the evicted VMA list is checked and rebinds are triggered. In the
+ * case of faulting VM, the rebind is done in the page fault handler.
+ *
+ * Suspend / resume eviction of VRAM
+ * ---------------------------------
+ *
+ * During device suspend / resume VRAM may lose power which means the contents
+ * of VRAM's memory is blown away. Thus BOs present in VRAM at the time of
+ * suspend must be moved to sysmem in order for their contents to be saved.
+ *
+ * A simple TTM call (ttm_resource_manager_evict_all) can move all non-pinned
+ * (user) BOs to sysmem. External BOs that are pinned need to be manually
+ * evicted with a simple loop + xe_bo_evict call. It gets a little trickier
+ * with kernel BOs.
+ *
+ * Some kernel BOs are used by the GT migration engine to do moves, thus we
+ * can't move all of the BOs via the GT migration engine. For simplity, use a
+ * TTM memcpy (CPU) to move any kernel (pinned) BO on either suspend or resume.
+ *
+ * Some kernel BOs need to be restored to the exact same physical location. TTM
+ * makes this rather easy but the caveat is the memory must be contiguous. Again
+ * for simplity, we enforce that all kernel (pinned) BOs are contiguous and
+ * restored to the same physical location.
+ *
+ * Pinned external BOs in VRAM are restored on resume via the GPU.
+ *
+ * Rebinds after suspend / resume
+ * ------------------------------
+ *
+ * Most kernel BOs have GGTT mappings which must be restored during the resume
+ * process. All user BOs are rebound after validation on their next use.
+ *
+ * Future work
+ * ===========
+ *
+ * Trim the list of BOs which is saved / restored via TTM memcpy on suspend /
+ * resume. All we really need to save / restore via TTM memcpy is the memory
+ * required for the GuC to load and the memory for the GT migrate engine to
+ * operate.
+ *
+ * Do not require kernel BOs to be contiguous in physical memory / restored to
+ * the same physical address on resume. In all likelihood the only memory that
+ * needs to be restored to the same physical address is memory used for page
+ * tables. All of that memory is allocated 1 page at time so the contiguous
+ * requirement isn't needed. Some work on the vmap code would need to be done if
+ * kernel BOs are not contiguous too.
+ *
+ * Make some kernel BO evictable rather than pinned. An example of this would be
+ * engine state, in all likelihood if the dma-slots of these BOs where properly
+ * used rather than pinning we could safely evict + rebind these BOs as needed.
+ *
+ * Some kernel BOs do not need to be restored on resume (e.g. GuC ADS as that is
+ * repopulated on resume), add flag to mark such objects as no save / restore.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
new file mode 100644
index 000000000000..7a264a9ca06e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo_evict.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_ggtt.h"
+#include "xe_tile.h"
+
+/**
+ * xe_bo_evict_all - evict all BOs from VRAM
+ *
+ * @xe: xe device
+ *
+ * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next
+ * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU.
+ * All eviction magic done via TTM calls.
+ *
+ * Evict == move VRAM BOs to temporary (typically system) memory.
+ *
+ * This function should be called before the device goes into a suspend state
+ * where the VRAM loses power.
+ */
+int xe_bo_evict_all(struct xe_device *xe)
+{
+	struct ttm_device *bdev = &xe->ttm;
+	struct xe_bo *bo;
+	struct xe_tile *tile;
+	struct list_head still_in_list;
+	u32 mem_type;
+	u8 id;
+	int ret;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	/* User memory */
+	for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
+		struct ttm_resource_manager *man =
+			ttm_manager_type(bdev, mem_type);
+
+		if (man) {
+			ret = ttm_resource_manager_evict_all(bdev, man);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/* Pinned user memory in VRAM */
+	INIT_LIST_HEAD(&still_in_list);
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.external_vram,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		xe_bo_get(bo);
+		list_move_tail(&bo->pinned_link, &still_in_list);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, false);
+		ret = xe_bo_evict_pinned(bo);
+		xe_bo_unlock(bo);
+		xe_bo_put(bo);
+		if (ret) {
+			spin_lock(&xe->pinned.lock);
+			list_splice_tail(&still_in_list,
+					 &xe->pinned.external_vram);
+			spin_unlock(&xe->pinned.lock);
+			return ret;
+		}
+
+		spin_lock(&xe->pinned.lock);
+	}
+	list_splice_tail(&still_in_list, &xe->pinned.external_vram);
+	spin_unlock(&xe->pinned.lock);
+
+	/*
+	 * Wait for all user BO to be evicted as those evictions depend on the
+	 * memory moved below.
+	 */
+	for_each_tile(tile, xe, id)
+		xe_tile_migrate_wait(tile);
+
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		xe_bo_get(bo);
+		list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, false);
+		ret = xe_bo_evict_pinned(bo);
+		xe_bo_unlock(bo);
+		xe_bo_put(bo);
+		if (ret)
+			return ret;
+
+		spin_lock(&xe->pinned.lock);
+	}
+	spin_unlock(&xe->pinned.lock);
+
+	return 0;
+}
+
+/**
+ * xe_bo_restore_kernel - restore kernel BOs to VRAM
+ *
+ * @xe: xe device
+ *
+ * Move kernel BOs from temporary (typically system) memory to VRAM via CPU. All
+ * moves done via TTM calls.
+ *
+ * This function should be called early, before trying to init the GT, on device
+ * resume.
+ */
+int xe_bo_restore_kernel(struct xe_device *xe)
+{
+	struct xe_bo *bo;
+	int ret;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.evicted,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		xe_bo_get(bo);
+		list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, false);
+		ret = xe_bo_restore_pinned(bo);
+		xe_bo_unlock(bo);
+		if (ret) {
+			xe_bo_put(bo);
+			return ret;
+		}
+
+		if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
+			struct xe_tile *tile = bo->tile;
+
+			mutex_lock(&tile->mem.ggtt->lock);
+			xe_ggtt_map_bo(tile->mem.ggtt, bo);
+			mutex_unlock(&tile->mem.ggtt->lock);
+		}
+
+		/*
+		 * We expect validate to trigger a move VRAM and our move code
+		 * should setup the iosys map.
+		 */
+		xe_assert(xe, !iosys_map_is_null(&bo->vmap));
+		xe_assert(xe, xe_bo_is_vram(bo));
+
+		xe_bo_put(bo);
+
+		spin_lock(&xe->pinned.lock);
+	}
+	spin_unlock(&xe->pinned.lock);
+
+	return 0;
+}
+
+/**
+ * xe_bo_restore_user - restore pinned user BOs to VRAM
+ *
+ * @xe: xe device
+ *
+ * Move pinned user BOs from temporary (typically system) memory to VRAM via
+ * CPU. All moves done via TTM calls.
+ *
+ * This function should be called late, after GT init, on device resume.
+ */
+int xe_bo_restore_user(struct xe_device *xe)
+{
+	struct xe_bo *bo;
+	struct xe_tile *tile;
+	struct list_head still_in_list;
+	u8 id;
+	int ret;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	/* Pinned user memory in VRAM should be validated on resume */
+	INIT_LIST_HEAD(&still_in_list);
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		bo = list_first_entry_or_null(&xe->pinned.external_vram,
+					      typeof(*bo), pinned_link);
+		if (!bo)
+			break;
+		list_move_tail(&bo->pinned_link, &still_in_list);
+		xe_bo_get(bo);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, false);
+		ret = xe_bo_restore_pinned(bo);
+		xe_bo_unlock(bo);
+		xe_bo_put(bo);
+		if (ret) {
+			spin_lock(&xe->pinned.lock);
+			list_splice_tail(&still_in_list,
+					 &xe->pinned.external_vram);
+			spin_unlock(&xe->pinned.lock);
+			return ret;
+		}
+
+		spin_lock(&xe->pinned.lock);
+	}
+	list_splice_tail(&still_in_list, &xe->pinned.external_vram);
+	spin_unlock(&xe->pinned.lock);
+
+	/* Wait for validate to complete */
+	for_each_tile(tile, xe, id)
+		xe_tile_migrate_wait(tile);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h
new file mode 100644
index 000000000000..746894798852
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_evict.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_EVICT_H_
+#define _XE_BO_EVICT_H_
+
+struct xe_device;
+
+int xe_bo_evict_all(struct xe_device *xe);
+int xe_bo_restore_kernel(struct xe_device *xe);
+int xe_bo_restore_user(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
new file mode 100644
index 000000000000..64c2249a4e40
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_TYPES_H_
+#define _XE_BO_TYPES_H_
+
+#include <linux/iosys-map.h>
+
+#include <drm/drm_mm.h>
+#include <drm/ttm/ttm_bo.h>
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_placement.h>
+
+struct xe_device;
+struct xe_vm;
+
+#define XE_BO_MAX_PLACEMENTS	3
+
+/* TODO: To be selected with VM_MADVISE */
+#define	XE_BO_PRIORITY_NORMAL	1
+
+/** @xe_bo: XE buffer object */
+struct xe_bo {
+	/** @ttm: TTM base buffer object */
+	struct ttm_buffer_object ttm;
+	/** @size: Size of this buffer object */
+	size_t size;
+	/** @flags: flags for this buffer object */
+	u32 flags;
+	/** @vm: VM this BO is attached to, for extobj this will be NULL */
+	struct xe_vm *vm;
+	/** @tile: Tile this BO is attached to (kernel BO only) */
+	struct xe_tile *tile;
+	/** @placements: valid placements for this BO */
+	struct ttm_place placements[XE_BO_MAX_PLACEMENTS];
+	/** @placement: current placement for this BO */
+	struct ttm_placement placement;
+	/** @ggtt_node: GGTT node if this BO is mapped in the GGTT */
+	struct drm_mm_node ggtt_node;
+	/** @vmap: iosys map of this buffer */
+	struct iosys_map vmap;
+	/** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
+	struct ttm_bo_kmap_obj kmap;
+	/** @pinned_link: link to present / evicted list of pinned BO */
+	struct list_head pinned_link;
+#ifdef CONFIG_PROC_FS
+	/**
+	 * @client: @xe_drm_client which created the bo
+	 */
+	struct xe_drm_client *client;
+	/**
+	 * @client_link: Link into @xe_drm_client.objects_list
+	 */
+	struct list_head client_link;
+#endif
+	/** @props: BO user controlled properties */
+	struct {
+		/** @preferred_mem: preferred memory class for this BO */
+		s16 preferred_mem_class;
+		/** @prefered_gt: preferred GT for this BO */
+		s16 preferred_gt;
+		/** @preferred_mem_type: preferred memory type */
+		s32 preferred_mem_type;
+		/**
+		 * @cpu_atomic: the CPU expects to do atomics operations to
+		 * this BO
+		 */
+		bool cpu_atomic;
+		/**
+		 * @device_atomic: the device expects to do atomics operations
+		 * to this BO
+		 */
+		bool device_atomic;
+	} props;
+	/** @freed: List node for delayed put. */
+	struct llist_node freed;
+	/** @created: Whether the bo has passed initial creation */
+	bool created;
+
+	/** @ccs_cleared */
+	bool ccs_cleared;
+
+	/**
+	 * @cpu_caching: CPU caching mode. Currently only used for userspace
+	 * objects.
+	 */
+	u16 cpu_caching;
+};
+
+#define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
+#define intel_bo_to_i915(bo) to_i915(intel_bo_to_drm_bo(bo)->dev)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
new file mode 100644
index 000000000000..c56fd7d59f05
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_debugfs.h"
+
+#include <linux/string_helpers.h>
+
+#include <drm/drm_debugfs.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt_debugfs.h"
+#include "xe_step.h"
+
+#ifdef CONFIG_DRM_XE_DEBUG
+#include "xe_bo_evict.h"
+#include "xe_migrate.h"
+#include "xe_vm.h"
+#endif
+
+#ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h> /* XXX: fault-inject.h is broken */
+DECLARE_FAULT_ATTR(gt_reset_failure);
+#endif
+
+static struct xe_device *node_to_xe(struct drm_info_node *node)
+{
+	return to_xe_device(node->minor->dev);
+}
+
+static int info(struct seq_file *m, void *data)
+{
+	struct xe_device *xe = node_to_xe(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+	struct xe_gt *gt;
+	u8 id;
+
+	drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100);
+	drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100);
+	drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n",
+		   xe_step_name(xe->info.step.graphics),
+		   xe_step_name(xe->info.step.media),
+		   xe_step_name(xe->info.step.display),
+		   xe_step_name(xe->info.step.basedie));
+	drm_printf(&p, "is_dgfx %s\n", str_yes_no(xe->info.is_dgfx));
+	drm_printf(&p, "platform %d\n", xe->info.platform);
+	drm_printf(&p, "subplatform %d\n",
+		   xe->info.subplatform > XE_SUBPLATFORM_NONE ? xe->info.subplatform : 0);
+	drm_printf(&p, "devid 0x%x\n", xe->info.devid);
+	drm_printf(&p, "revid %d\n", xe->info.revid);
+	drm_printf(&p, "tile_count %d\n", xe->info.tile_count);
+	drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level);
+	drm_printf(&p, "force_execlist %s\n", str_yes_no(xe->info.force_execlist));
+	drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs));
+	drm_printf(&p, "has_usm %s\n", str_yes_no(xe->info.has_usm));
+	for_each_gt(gt, xe, id) {
+		drm_printf(&p, "gt%d force wake %d\n", id,
+			   xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT));
+		drm_printf(&p, "gt%d engine_mask 0x%llx\n", id,
+			   gt->info.engine_mask);
+	}
+
+	return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+	{"info", info, 0},
+};
+
+static int forcewake_open(struct inode *inode, struct file *file)
+{
+	struct xe_device *xe = inode->i_private;
+	struct xe_gt *gt;
+	u8 id;
+
+	xe_device_mem_access_get(xe);
+
+	for_each_gt(gt, xe, id)
+		XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+	return 0;
+}
+
+static int forcewake_release(struct inode *inode, struct file *file)
+{
+	struct xe_device *xe = inode->i_private;
+	struct xe_gt *gt;
+	u8 id;
+
+	for_each_gt(gt, xe, id)
+		XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+static const struct file_operations forcewake_all_fops = {
+	.owner = THIS_MODULE,
+	.open = forcewake_open,
+	.release = forcewake_release,
+};
+
+void xe_debugfs_register(struct xe_device *xe)
+{
+	struct ttm_device *bdev = &xe->ttm;
+	struct drm_minor *minor = xe->drm.primary;
+	struct dentry *root = minor->debugfs_root;
+	struct ttm_resource_manager *man;
+	struct xe_gt *gt;
+	u32 mem_type;
+	u8 id;
+
+	drm_debugfs_create_files(debugfs_list,
+				 ARRAY_SIZE(debugfs_list),
+				 root, minor);
+
+	debugfs_create_file("forcewake_all", 0400, root, xe,
+			    &forcewake_all_fops);
+
+	for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
+		man = ttm_manager_type(bdev, mem_type);
+
+		if (man) {
+			char name[16];
+
+			sprintf(name, "vram%d_mm", mem_type - XE_PL_VRAM0);
+			ttm_resource_manager_create_debugfs(man, root, name);
+		}
+	}
+
+	man = ttm_manager_type(bdev, XE_PL_TT);
+	ttm_resource_manager_create_debugfs(man, root, "gtt_mm");
+
+	man = ttm_manager_type(bdev, XE_PL_STOLEN);
+	if (man)
+		ttm_resource_manager_create_debugfs(man, root, "stolen_mm");
+
+	for_each_gt(gt, xe, id)
+		xe_gt_debugfs_register(gt);
+
+#ifdef CONFIG_FAULT_INJECTION
+	fault_create_debugfs_attr("fail_gt_reset", root, &gt_reset_failure);
+#endif
+
+}
diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h
new file mode 100644
index 000000000000..715b8e2e0bd9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_debugfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_DEBUGFS_H_
+#define _XE_DEBUGFS_H_
+
+struct xe_device;
+
+void xe_debugfs_register(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
new file mode 100644
index 000000000000..68abc0b195be
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_devcoredump.h"
+#include "xe_devcoredump_types.h"
+
+#include <linux/devcoredump.h>
+#include <generated/utsrelease.h>
+
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_submit.h"
+#include "xe_hw_engine.h"
+
+/**
+ * DOC: Xe device coredump
+ *
+ * Devices overview:
+ * Xe uses dev_coredump infrastructure for exposing the crash errors in a
+ * standardized way.
+ * devcoredump exposes a temporary device under /sys/class/devcoredump/
+ * which is linked with our card device directly.
+ * The core dump can be accessed either from
+ * /sys/class/drm/card<n>/device/devcoredump/ or from
+ * /sys/class/devcoredump/devcd<m> where
+ * /sys/class/devcoredump/devcd<m>/failing_device is a link to
+ * /sys/class/drm/card<n>/device/.
+ *
+ * Snapshot at hang:
+ * The 'data' file is printed with a drm_printer pointer at devcoredump read
+ * time. For this reason, we need to take snapshots from when the hang has
+ * happened, and not only when the user is reading the file. Otherwise the
+ * information is outdated since the resets might have happened in between.
+ *
+ * 'First' failure snapshot:
+ * In general, the first hang is the most critical one since the following hangs
+ * can be a consequence of the initial hang. For this reason we only take the
+ * snapshot of the 'first' failure and ignore subsequent calls of this function,
+ * at least while the coredump device is alive. Dev_coredump has a delayed work
+ * queue that will eventually delete the device and free all the dump
+ * information.
+ */
+
+#ifdef CONFIG_DEV_COREDUMP
+
+static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump)
+{
+	return container_of(coredump, struct xe_device, devcoredump);
+}
+
+static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
+{
+	return &q->gt->uc.guc;
+}
+
+static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
+				   size_t count, void *data, size_t datalen)
+{
+	struct xe_devcoredump *coredump = data;
+	struct xe_devcoredump_snapshot *ss;
+	struct drm_printer p;
+	struct drm_print_iterator iter;
+	struct timespec64 ts;
+	int i;
+
+	/* Our device is gone already... */
+	if (!data || !coredump_to_xe(coredump))
+		return -ENODEV;
+
+	iter.data = buffer;
+	iter.offset = 0;
+	iter.start = offset;
+	iter.remain = count;
+
+	ss = &coredump->snapshot;
+	p = drm_coredump_printer(&iter);
+
+	drm_printf(&p, "**** Xe Device Coredump ****\n");
+	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
+	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
+
+	ts = ktime_to_timespec64(ss->snapshot_time);
+	drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
+	ts = ktime_to_timespec64(ss->boot_time);
+	drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
+
+	drm_printf(&p, "\n**** GuC CT ****\n");
+	xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
+	xe_guc_exec_queue_snapshot_print(coredump->snapshot.ge, &p);
+
+	drm_printf(&p, "\n**** HW Engines ****\n");
+	for (i = 0; i < XE_NUM_HW_ENGINES; i++)
+		if (coredump->snapshot.hwe[i])
+			xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
+						    &p);
+
+	return count - iter.remain;
+}
+
+static void xe_devcoredump_free(void *data)
+{
+	struct xe_devcoredump *coredump = data;
+	int i;
+
+	/* Our device is gone. Nothing to do... */
+	if (!data || !coredump_to_xe(coredump))
+		return;
+
+	xe_guc_ct_snapshot_free(coredump->snapshot.ct);
+	xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge);
+	for (i = 0; i < XE_NUM_HW_ENGINES; i++)
+		if (coredump->snapshot.hwe[i])
+			xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
+
+	coredump->captured = false;
+	drm_info(&coredump_to_xe(coredump)->drm,
+		 "Xe device coredump has been deleted.\n");
+}
+
+static void devcoredump_snapshot(struct xe_devcoredump *coredump,
+				 struct xe_exec_queue *q)
+{
+	struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 adj_logical_mask = q->logical_mask;
+	u32 width_mask = (0x1 << q->width) - 1;
+	int i;
+	bool cookie;
+
+	ss->snapshot_time = ktime_get_real();
+	ss->boot_time = ktime_get_boottime();
+
+	cookie = dma_fence_begin_signalling();
+	for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
+		if (adj_logical_mask & BIT(i)) {
+			adj_logical_mask |= width_mask << i;
+			i += q->width;
+		} else {
+			++i;
+		}
+	}
+
+	xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+
+	coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
+	coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q);
+
+	for_each_hw_engine(hwe, q->gt, id) {
+		if (hwe->class != q->hwe->class ||
+		    !(BIT(hwe->logical_instance) & adj_logical_mask)) {
+			coredump->snapshot.hwe[id] = NULL;
+			continue;
+		}
+		coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe);
+	}
+
+	xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+	dma_fence_end_signalling(cookie);
+}
+
+/**
+ * xe_devcoredump - Take the required snapshots and initialize coredump device.
+ * @q: The faulty xe_exec_queue, where the issue was detected.
+ *
+ * This function should be called at the crash time within the serialized
+ * gt_reset. It is skipped if we still have the core dump device available
+ * with the information of the 'first' snapshot.
+ */
+void xe_devcoredump(struct xe_exec_queue *q)
+{
+	struct xe_device *xe = gt_to_xe(q->gt);
+	struct xe_devcoredump *coredump = &xe->devcoredump;
+
+	if (coredump->captured) {
+		drm_dbg(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n");
+		return;
+	}
+
+	coredump->captured = true;
+	devcoredump_snapshot(coredump, q);
+
+	drm_info(&xe->drm, "Xe device coredump has been created\n");
+	drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
+		 xe->drm.primary->index);
+
+	dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
+		      xe_devcoredump_read, xe_devcoredump_free);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
new file mode 100644
index 000000000000..6ac218a5c194
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DEVCOREDUMP_H_
+#define _XE_DEVCOREDUMP_H_
+
+struct xe_device;
+struct xe_exec_queue;
+
+#ifdef CONFIG_DEV_COREDUMP
+void xe_devcoredump(struct xe_exec_queue *q);
+#else
+static inline void xe_devcoredump(struct xe_exec_queue *q)
+{
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
new file mode 100644
index 000000000000..7fdad9c3d3dd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DEVCOREDUMP_TYPES_H_
+#define _XE_DEVCOREDUMP_TYPES_H_
+
+#include <linux/ktime.h>
+#include <linux/mutex.h>
+
+#include "xe_hw_engine_types.h"
+
+struct xe_device;
+
+/**
+ * struct xe_devcoredump_snapshot - Crash snapshot
+ *
+ * This struct contains all the useful information quickly captured at the time
+ * of the crash. So, any subsequent reads of the coredump points to a data that
+ * shows the state of the GPU of when the issue has happened.
+ */
+struct xe_devcoredump_snapshot {
+	/** @snapshot_time:  Time of this capture. */
+	ktime_t snapshot_time;
+	/** @boot_time:  Relative boot time so the uptime can be calculated. */
+	ktime_t boot_time;
+
+	/* GuC snapshots */
+	/** @ct: GuC CT snapshot */
+	struct xe_guc_ct_snapshot *ct;
+	/** @ge: Guc Engine snapshot */
+	struct xe_guc_submit_exec_queue_snapshot *ge;
+	/** @hwe: HW Engine snapshot array */
+	struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
+};
+
+/**
+ * struct xe_devcoredump - Xe devcoredump main structure
+ *
+ * This struct represents the live and active dev_coredump node.
+ * It is created/populated at the time of a crash/error. Then it
+ * is read later when user access the device coredump data file
+ * for reading the information.
+ */
+struct xe_devcoredump {
+	/** @xe: Xe device. */
+	struct xe_device *xe;
+	/** @captured: The snapshot of the first hang has already been taken. */
+	bool captured;
+	/** @snapshot: Snapshot is captured at time of the first crash */
+	struct xe_devcoredump_snapshot snapshot;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
new file mode 100644
index 000000000000..d9ae77fe7382
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_device.h"
+
+#include <linux/units.h>
+
+#include <drm/drm_aperture.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <drm/xe_drm.h>
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_debugfs.h"
+#include "xe_display.h"
+#include "xe_dma_buf.h"
+#include "xe_drm_client.h"
+#include "xe_drv.h"
+#include "xe_exec_queue.h"
+#include "xe_exec.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_irq.h"
+#include "xe_mmio.h"
+#include "xe_module.h"
+#include "xe_pat.h"
+#include "xe_pcode.h"
+#include "xe_pm.h"
+#include "xe_query.h"
+#include "xe_tile.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_ttm_sys_mgr.h"
+#include "xe_vm.h"
+#include "xe_wait_user_fence.h"
+#include "xe_hwmon.h"
+
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map xe_device_mem_access_lockdep_map = {
+	.name = "xe_device_mem_access_lockdep_map"
+};
+#endif
+
+static int xe_file_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_drm_client *client;
+	struct xe_file *xef;
+	int ret = -ENOMEM;
+
+	xef = kzalloc(sizeof(*xef), GFP_KERNEL);
+	if (!xef)
+		return ret;
+
+	client = xe_drm_client_alloc();
+	if (!client) {
+		kfree(xef);
+		return ret;
+	}
+
+	xef->drm = file;
+	xef->client = client;
+	xef->xe = xe;
+
+	mutex_init(&xef->vm.lock);
+	xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
+
+	mutex_init(&xef->exec_queue.lock);
+	xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
+
+	spin_lock(&xe->clients.lock);
+	xe->clients.count++;
+	spin_unlock(&xe->clients.lock);
+
+	file->driver_priv = xef;
+	return 0;
+}
+
+static void device_kill_persistent_exec_queues(struct xe_device *xe,
+					       struct xe_file *xef);
+
+static void xe_file_close(struct drm_device *dev, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = file->driver_priv;
+	struct xe_vm *vm;
+	struct xe_exec_queue *q;
+	unsigned long idx;
+
+	mutex_lock(&xef->exec_queue.lock);
+	xa_for_each(&xef->exec_queue.xa, idx, q) {
+		xe_exec_queue_kill(q);
+		xe_exec_queue_put(q);
+	}
+	mutex_unlock(&xef->exec_queue.lock);
+	xa_destroy(&xef->exec_queue.xa);
+	mutex_destroy(&xef->exec_queue.lock);
+	device_kill_persistent_exec_queues(xe, xef);
+
+	mutex_lock(&xef->vm.lock);
+	xa_for_each(&xef->vm.xa, idx, vm)
+		xe_vm_close_and_put(vm);
+	mutex_unlock(&xef->vm.lock);
+	xa_destroy(&xef->vm.xa);
+	mutex_destroy(&xef->vm.lock);
+
+	spin_lock(&xe->clients.lock);
+	xe->clients.count--;
+	spin_unlock(&xe->clients.lock);
+
+	xe_drm_client_put(xef->client);
+	kfree(xef);
+}
+
+static const struct drm_ioctl_desc xe_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
+			  DRM_RENDER_ALLOW),
+};
+
+static const struct file_operations xe_driver_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release_noglobal,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = drm_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+	.compat_ioctl = drm_compat_ioctl,
+	.llseek = noop_llseek,
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo = drm_show_fdinfo,
+#endif
+};
+
+static void xe_driver_release(struct drm_device *dev)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL);
+}
+
+static struct drm_driver driver = {
+	/* Don't use MTRRs here; the Xserver or userspace app should
+	 * deal with them for Intel hardware.
+	 */
+	.driver_features =
+	    DRIVER_GEM |
+	    DRIVER_RENDER | DRIVER_SYNCOBJ |
+	    DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
+	.open = xe_file_open,
+	.postclose = xe_file_close,
+
+	.gem_prime_import = xe_gem_prime_import,
+
+	.dumb_create = xe_bo_dumb_create,
+	.dumb_map_offset = drm_gem_ttm_dumb_map_offset,
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo = xe_drm_client_fdinfo,
+#endif
+	.release = &xe_driver_release,
+
+	.ioctls = xe_ioctls,
+	.num_ioctls = ARRAY_SIZE(xe_ioctls),
+	.fops = &xe_driver_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static void xe_device_destroy(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (xe->ordered_wq)
+		destroy_workqueue(xe->ordered_wq);
+
+	if (xe->unordered_wq)
+		destroy_workqueue(xe->unordered_wq);
+
+	ttm_device_fini(&xe->ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+				   const struct pci_device_id *ent)
+{
+	struct xe_device *xe;
+	int err;
+
+	xe_display_driver_set_hooks(&driver);
+
+	err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
+	if (err)
+		return ERR_PTR(err);
+
+	xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
+	if (IS_ERR(xe))
+		return xe;
+
+	err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
+			      xe->drm.anon_inode->i_mapping,
+			      xe->drm.vma_offset_manager, false, false);
+	if (WARN_ON(err))
+		goto err;
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
+	if (err)
+		goto err;
+
+	xe->info.devid = pdev->device;
+	xe->info.revid = pdev->revision;
+	xe->info.force_execlist = xe_modparam.force_execlist;
+
+	spin_lock_init(&xe->irq.lock);
+	spin_lock_init(&xe->clients.lock);
+
+	init_waitqueue_head(&xe->ufence_wq);
+
+	drmm_mutex_init(&xe->drm, &xe->usm.lock);
+	xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
+
+	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+		/* Trigger a large asid and an early asid wrap. */
+		u32 asid;
+
+		BUILD_BUG_ON(XE_MAX_ASID < 2);
+		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
+				      XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
+				      &xe->usm.next_asid, GFP_KERNEL);
+		drm_WARN_ON(&xe->drm, err);
+		if (err >= 0)
+			xa_erase(&xe->usm.asid_to_vm, asid);
+	}
+
+	drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock);
+	INIT_LIST_HEAD(&xe->persistent_engines.list);
+
+	spin_lock_init(&xe->pinned.lock);
+	INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
+	INIT_LIST_HEAD(&xe->pinned.external_vram);
+	INIT_LIST_HEAD(&xe->pinned.evicted);
+
+	xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
+	xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
+	if (!xe->ordered_wq || !xe->unordered_wq) {
+		drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
+		err = -ENOMEM;
+		goto err;
+	}
+
+	err = xe_display_create(xe);
+	if (WARN_ON(err))
+		goto err;
+
+	return xe;
+
+err:
+	return ERR_PTR(err);
+}
+
+/*
+ * The driver-initiated FLR is the highest level of reset that we can trigger
+ * from within the driver. It is different from the PCI FLR in that it doesn't
+ * fully reset the SGUnit and doesn't modify the PCI config space and therefore
+ * it doesn't require a re-enumeration of the PCI BARs. However, the
+ * driver-initiated FLR does still cause a reset of both GT and display and a
+ * memory wipe of local and stolen memory, so recovery would require a full HW
+ * re-init and saving/restoring (or re-populating) the wiped memory. Since we
+ * perform the FLR as the very last action before releasing access to the HW
+ * during the driver release flow, we don't attempt recovery at all, because
+ * if/when a new instance of i915 is bound to the device it will do a full
+ * re-init anyway.
+ */
+static void xe_driver_flr(struct xe_device *xe)
+{
+	const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	int ret;
+
+	if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
+		drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
+		return;
+	}
+
+	drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
+
+	/*
+	 * Make sure any pending FLR requests have cleared by waiting for the
+	 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
+	 * to make sure it's not still set from a prior attempt (it's a write to
+	 * clear bit).
+	 * Note that we should never be in a situation where a previous attempt
+	 * is still pending (unless the HW is totally dead), but better to be
+	 * safe in case something unexpected happens
+	 */
+	ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+	if (ret) {
+		drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
+		return;
+	}
+	xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+
+	/* Trigger the actual Driver-FLR */
+	xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
+
+	/* Wait for hardware teardown to complete */
+	ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
+	if (ret) {
+		drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
+		return;
+	}
+
+	/* Wait for hardware/firmware re-init to complete */
+	ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+			     flr_timeout, NULL, false);
+	if (ret) {
+		drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
+		return;
+	}
+
+	/* Clear sticky completion status */
+	xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
+}
+
+static void xe_driver_flr_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+
+	if (xe->needs_flr_on_fini)
+		xe_driver_flr(xe);
+}
+
+static void xe_device_sanitize(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_gt *gt;
+	u8 id;
+
+	for_each_gt(gt, xe, id)
+		xe_gt_sanitize(gt);
+}
+
+static int xe_set_dma_info(struct xe_device *xe)
+{
+	unsigned int mask_size = xe->info.dma_mask_size;
+	int err;
+
+	dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
+
+	err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+	if (err)
+		goto mask_err;
+
+	err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+	if (err)
+		goto mask_err;
+
+	return 0;
+
+mask_err:
+	drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
+	return err;
+}
+
+/*
+ * Initialize MMIO resources that don't require any knowledge about tile count.
+ */
+int xe_device_probe_early(struct xe_device *xe)
+{
+	int err;
+
+	err = xe_mmio_init(xe);
+	if (err)
+		return err;
+
+	err = xe_mmio_root_tile_init(xe);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int xe_device_set_has_flat_ccs(struct  xe_device *xe)
+{
+	u32 reg;
+	int err;
+
+	if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
+		return 0;
+
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
+	xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
+
+	if (!xe->info.has_flat_ccs)
+		drm_dbg(&xe->drm,
+			"Flat CCS has been disabled in bios, May lead to performance impact");
+
+	return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+int xe_device_probe(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	struct xe_gt *gt;
+	int err;
+	u8 id;
+
+	xe_pat_init_early(xe);
+
+	xe->info.mem_region_mask = 1;
+	err = xe_display_init_nommio(xe);
+	if (err)
+		return err;
+
+	err = xe_set_dma_info(xe);
+	if (err)
+		return err;
+
+	xe_mmio_probe_tiles(xe);
+
+	xe_ttm_sys_mgr_init(xe);
+
+	for_each_gt(gt, xe, id)
+		xe_force_wake_init_gt(gt, gt_to_fw(gt));
+
+	for_each_tile(tile, xe, id) {
+		err = xe_ggtt_init_early(tile->mem.ggtt);
+		if (err)
+			return err;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
+	if (err)
+		return err;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_pcode_probe(gt);
+		if (err)
+			return err;
+	}
+
+	err = xe_display_init_noirq(xe);
+	if (err)
+		return err;
+
+	err = xe_irq_install(xe);
+	if (err)
+		goto err;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_init_early(gt);
+		if (err)
+			goto err_irq_shutdown;
+	}
+
+	err = xe_device_set_has_flat_ccs(xe);
+	if (err)
+		return err;
+
+	err = xe_mmio_probe_vram(xe);
+	if (err)
+		goto err_irq_shutdown;
+
+	for_each_tile(tile, xe, id) {
+		err = xe_tile_init_noalloc(tile);
+		if (err)
+			goto err_irq_shutdown;
+	}
+
+	/* Allocate and map stolen after potential VRAM resize */
+	xe_ttm_stolen_mgr_init(xe);
+
+	/*
+	 * Now that GT is initialized (TTM in particular),
+	 * we can try to init display, and inherit the initial fb.
+	 * This is the reason the first allocation needs to be done
+	 * inside display.
+	 */
+	err = xe_display_init_noaccel(xe);
+	if (err)
+		goto err_irq_shutdown;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_init(gt);
+		if (err)
+			goto err_irq_shutdown;
+	}
+
+	xe_heci_gsc_init(xe);
+
+	err = xe_display_init(xe);
+	if (err)
+		goto err_irq_shutdown;
+
+	err = drm_dev_register(&xe->drm, 0);
+	if (err)
+		goto err_fini_display;
+
+	xe_display_register(xe);
+
+	xe_debugfs_register(xe);
+
+	xe_hwmon_register(xe);
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
+	if (err)
+		return err;
+
+	return 0;
+
+err_fini_display:
+	xe_display_driver_remove(xe);
+
+err_irq_shutdown:
+	xe_irq_shutdown(xe);
+err:
+	xe_display_fini(xe);
+	return err;
+}
+
+static void xe_device_remove_display(struct xe_device *xe)
+{
+	xe_display_unregister(xe);
+
+	drm_dev_unplug(&xe->drm);
+	xe_display_driver_remove(xe);
+}
+
+void xe_device_remove(struct xe_device *xe)
+{
+	xe_device_remove_display(xe);
+
+	xe_display_fini(xe);
+
+	xe_heci_gsc_fini(xe);
+
+	xe_irq_shutdown(xe);
+}
+
+void xe_device_shutdown(struct xe_device *xe)
+{
+}
+
+void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q)
+{
+	mutex_lock(&xe->persistent_engines.lock);
+	list_add_tail(&q->persistent.link, &xe->persistent_engines.list);
+	mutex_unlock(&xe->persistent_engines.lock);
+}
+
+void xe_device_remove_persistent_exec_queues(struct xe_device *xe,
+					     struct xe_exec_queue *q)
+{
+	mutex_lock(&xe->persistent_engines.lock);
+	if (!list_empty(&q->persistent.link))
+		list_del(&q->persistent.link);
+	mutex_unlock(&xe->persistent_engines.lock);
+}
+
+static void device_kill_persistent_exec_queues(struct xe_device *xe,
+					       struct xe_file *xef)
+{
+	struct xe_exec_queue *q, *next;
+
+	mutex_lock(&xe->persistent_engines.lock);
+	list_for_each_entry_safe(q, next, &xe->persistent_engines.list,
+				 persistent.link)
+		if (q->persistent.xef == xef) {
+			xe_exec_queue_kill(q);
+			list_del_init(&q->persistent.link);
+		}
+	mutex_unlock(&xe->persistent_engines.lock);
+}
+
+void xe_device_wmb(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+
+	wmb();
+	if (IS_DGFX(xe))
+		xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0);
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+{
+	return xe_device_has_flat_ccs(xe) ?
+		DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
+}
+
+bool xe_device_mem_access_ongoing(struct xe_device *xe)
+{
+	if (xe_pm_read_callback_task(xe) != NULL)
+		return true;
+
+	return atomic_read(&xe->mem_access.ref);
+}
+
+void xe_device_assert_mem_access(struct xe_device *xe)
+{
+	XE_WARN_ON(!xe_device_mem_access_ongoing(xe));
+}
+
+bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe)
+{
+	bool active;
+
+	if (xe_pm_read_callback_task(xe) == current)
+		return true;
+
+	active = xe_pm_runtime_get_if_active(xe);
+	if (active) {
+		int ref = atomic_inc_return(&xe->mem_access.ref);
+
+		xe_assert(xe, ref != S32_MAX);
+	}
+
+	return active;
+}
+
+void xe_device_mem_access_get(struct xe_device *xe)
+{
+	int ref;
+
+	/*
+	 * This looks racy, but should be fine since the pm_callback_task only
+	 * transitions from NULL -> current (and back to NULL again), during the
+	 * runtime_resume() or runtime_suspend() callbacks, for which there can
+	 * only be a single one running for our device. We only need to prevent
+	 * recursively calling the runtime_get or runtime_put from those
+	 * callbacks, as well as preventing triggering any access_ongoing
+	 * asserts.
+	 */
+	if (xe_pm_read_callback_task(xe) == current)
+		return;
+
+	/*
+	 * Since the resume here is synchronous it can be quite easy to deadlock
+	 * if we are not careful. Also in practice it might be quite timing
+	 * sensitive to ever see the 0 -> 1 transition with the callers locks
+	 * held, so deadlocks might exist but are hard for lockdep to ever see.
+	 * With this in mind, help lockdep learn about the potentially scary
+	 * stuff that can happen inside the runtime_resume callback by acquiring
+	 * a dummy lock (it doesn't protect anything and gets compiled out on
+	 * non-debug builds).  Lockdep then only needs to see the
+	 * mem_access_lockdep_map -> runtime_resume callback once, and then can
+	 * hopefully validate all the (callers_locks) -> mem_access_lockdep_map.
+	 * For example if the (callers_locks) are ever grabbed in the
+	 * runtime_resume callback, lockdep should give us a nice splat.
+	 */
+	lock_map_acquire(&xe_device_mem_access_lockdep_map);
+	lock_map_release(&xe_device_mem_access_lockdep_map);
+
+	xe_pm_runtime_get(xe);
+	ref = atomic_inc_return(&xe->mem_access.ref);
+
+	xe_assert(xe, ref != S32_MAX);
+
+}
+
+void xe_device_mem_access_put(struct xe_device *xe)
+{
+	int ref;
+
+	if (xe_pm_read_callback_task(xe) == current)
+		return;
+
+	ref = atomic_dec_return(&xe->mem_access.ref);
+	xe_pm_runtime_put(xe);
+
+	xe_assert(xe, ref >= 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
new file mode 100644
index 000000000000..3da83b233206
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_DEVICE_H_
+#define _XE_DEVICE_H_
+
+struct xe_exec_queue;
+struct xe_file;
+
+#include <drm/drm_util.h>
+
+#include "regs/xe_gpu_commands.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_macros.h"
+
+#ifdef CONFIG_LOCKDEP
+extern struct lockdep_map xe_device_mem_access_lockdep_map;
+#endif
+
+static inline struct xe_device *to_xe_device(const struct drm_device *dev)
+{
+	return container_of(dev, struct xe_device, drm);
+}
+
+static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev)
+{
+	return pci_get_drvdata(pdev);
+}
+
+static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm)
+{
+	return container_of(ttm, struct xe_device, ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+				   const struct pci_device_id *ent);
+int xe_device_probe_early(struct xe_device *xe);
+int xe_device_probe(struct xe_device *xe);
+void xe_device_remove(struct xe_device *xe);
+void xe_device_shutdown(struct xe_device *xe);
+
+void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q);
+void xe_device_remove_persistent_exec_queues(struct xe_device *xe,
+					     struct xe_exec_queue *q);
+
+void xe_device_wmb(struct xe_device *xe);
+
+static inline struct xe_file *to_xe_file(const struct drm_file *file)
+{
+	return file->driver_priv;
+}
+
+static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe)
+{
+	return &xe->tiles[0];
+}
+
+#define XE_MAX_GT_PER_TILE 2
+
+static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id)
+{
+	if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id > XE_MAX_GT_PER_TILE))
+		gt_id = 0;
+
+	return gt_id ? tile->media_gt : tile->primary_gt;
+}
+
+static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
+{
+	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
+	struct xe_gt *gt;
+
+	/*
+	 * FIXME: This only works for now because multi-tile and standalone
+	 * media are mutually exclusive on the platforms we have today.
+	 *
+	 * id => GT mapping may change once we settle on how we want to handle
+	 * our UAPI.
+	 */
+	if (MEDIA_VER(xe) >= 13) {
+		gt = xe_tile_get_gt(root_tile, gt_id);
+	} else {
+		if (drm_WARN_ON(&xe->drm, gt_id > XE_MAX_TILES_PER_DEVICE))
+			gt_id = 0;
+
+		gt = xe->tiles[gt_id].primary_gt;
+	}
+
+	if (!gt)
+		return NULL;
+
+	drm_WARN_ON(&xe->drm, gt->info.id != gt_id);
+	drm_WARN_ON(&xe->drm, gt->info.type == XE_GT_TYPE_UNINITIALIZED);
+
+	return gt;
+}
+
+/*
+ * Provide a GT structure suitable for performing non-GT MMIO operations against
+ * the primary tile.  Primarily intended for early tile initialization, display
+ * handling, top-most interrupt enable/disable, etc.  Since anything using the
+ * MMIO handle returned by this function doesn't need GSI offset translation,
+ * we'll return the primary GT from the root tile.
+ *
+ * FIXME: Fix the driver design so that 'gt' isn't the target of all MMIO
+ * operations.
+ *
+ * Returns the primary gt of the root tile.
+ */
+static inline struct xe_gt *xe_root_mmio_gt(struct xe_device *xe)
+{
+	return xe_device_get_root_tile(xe)->primary_gt;
+}
+
+static inline bool xe_device_uc_enabled(struct xe_device *xe)
+{
+	return !xe->info.force_execlist;
+}
+
+#define for_each_tile(tile__, xe__, id__) \
+	for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__)++) \
+		for_each_if((tile__) = &(xe__)->tiles[(id__)])
+
+#define for_each_remote_tile(tile__, xe__, id__) \
+	for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \
+		for_each_if((tile__) = &(xe__)->tiles[(id__)])
+
+/*
+ * FIXME: This only works for now since multi-tile and standalone media
+ * happen to be mutually exclusive.  Future platforms may change this...
+ */
+#define for_each_gt(gt__, xe__, id__) \
+	for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \
+		for_each_if((gt__) = xe_device_get_gt((xe__), (id__)))
+
+static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt)
+{
+	return &gt->mmio.fw;
+}
+
+void xe_device_mem_access_get(struct xe_device *xe);
+bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe);
+void xe_device_mem_access_put(struct xe_device *xe);
+
+void xe_device_assert_mem_access(struct xe_device *xe);
+bool xe_device_mem_access_ongoing(struct xe_device *xe);
+
+static inline bool xe_device_in_fault_mode(struct xe_device *xe)
+{
+	return xe->usm.num_vm_in_fault_mode != 0;
+}
+
+static inline bool xe_device_in_non_fault_mode(struct xe_device *xe)
+{
+	return xe->usm.num_vm_in_non_fault_mode != 0;
+}
+
+static inline bool xe_device_has_flat_ccs(struct xe_device *xe)
+{
+	return xe->info.has_flat_ccs;
+}
+
+static inline bool xe_device_has_sriov(struct xe_device *xe)
+{
+	return xe->info.has_sriov;
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
new file mode 100644
index 000000000000..99113a5a2b84
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/pci.h>
+#include <linux/sysfs.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_device_sysfs.h"
+#include "xe_pm.h"
+
+/**
+ * DOC: Xe device sysfs
+ * Xe driver requires exposing certain tunable knobs controlled by user space for
+ * each graphics device. Considering this, we need to add sysfs attributes at device
+ * level granularity.
+ * These sysfs attributes will be available under pci device kobj directory.
+ *
+ * vram_d3cold_threshold - Report/change vram used threshold(in MB) below
+ * which vram save/restore is permissible during runtime D3cold entry/exit.
+ */
+
+static ssize_t
+vram_d3cold_threshold_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	int ret;
+
+	if (!xe)
+		return -EINVAL;
+
+	ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold);
+
+	return ret;
+}
+
+static ssize_t
+vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
+			    const char *buff, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	u32 vram_d3cold_threshold;
+	int ret;
+
+	if (!xe)
+		return -EINVAL;
+
+	ret = kstrtou32(buff, 0, &vram_d3cold_threshold);
+	if (ret)
+		return ret;
+
+	drm_dbg(&xe->drm, "vram_d3cold_threshold: %u\n", vram_d3cold_threshold);
+
+	ret = xe_pm_set_vram_threshold(xe, vram_d3cold_threshold);
+
+	return ret ?: count;
+}
+
+static DEVICE_ATTR_RW(vram_d3cold_threshold);
+
+static void xe_device_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+
+	sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
+}
+
+void xe_device_sysfs_init(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+	int ret;
+
+	ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
+	if (ret) {
+		drm_warn(&xe->drm, "Failed to create sysfs file\n");
+		return;
+	}
+
+	ret = drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe);
+	if (ret)
+		drm_warn(&xe->drm, "Failed to add sysfs fini drm action\n");
+}
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.h b/drivers/gpu/drm/xe/xe_device_sysfs.h
new file mode 100644
index 000000000000..38b240684bee
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DEVICE_SYSFS_H_
+#define _XE_DEVICE_SYSFS_H_
+
+struct xe_device;
+
+void xe_device_sysfs_init(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
new file mode 100644
index 000000000000..c45ef17b3473
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -0,0 +1,545 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022-2023 Intel Corporation
+ */
+
+#ifndef _XE_DEVICE_TYPES_H_
+#define _XE_DEVICE_TYPES_H_
+
+#include <linux/pci.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
+
+#include "xe_devcoredump_types.h"
+#include "xe_heci_gsc.h"
+#include "xe_gt_types.h"
+#include "xe_lmtt_types.h"
+#include "xe_platform_types.h"
+#include "xe_pt_types.h"
+#include "xe_sriov_types.h"
+#include "xe_step_types.h"
+
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+#include "soc/intel_pch.h"
+#include "intel_display_core.h"
+#include "intel_display_device.h"
+#endif
+
+struct xe_ggtt;
+struct xe_pat_ops;
+
+#define XE_BO_INVALID_OFFSET	LONG_MAX
+
+#define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100)
+#define MEDIA_VER(xe) ((xe)->info.media_verx100 / 100)
+#define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100)
+#define MEDIA_VERx100(xe) ((xe)->info.media_verx100)
+#define IS_DGFX(xe) ((xe)->info.is_dgfx)
+#define HAS_HECI_GSCFI(xe) ((xe)->info.has_heci_gscfi)
+
+#define XE_VRAM_FLAGS_NEED64K		BIT(0)
+
+#define XE_GT0		0
+#define XE_GT1		1
+#define XE_MAX_TILES_PER_DEVICE	(XE_GT1 + 1)
+
+#define XE_MAX_ASID	(BIT(20))
+
+#define IS_PLATFORM_STEP(_xe, _platform, min_step, max_step)	\
+	((_xe)->info.platform == (_platform) &&			\
+	 (_xe)->info.step.graphics >= (min_step) &&		\
+	 (_xe)->info.step.graphics < (max_step))
+#define IS_SUBPLATFORM_STEP(_xe, _platform, sub, min_step, max_step)	\
+	((_xe)->info.platform == (_platform) &&				\
+	 (_xe)->info.subplatform == (sub) &&				\
+	 (_xe)->info.step.graphics >= (min_step) &&			\
+	 (_xe)->info.step.graphics < (max_step))
+
+#define tile_to_xe(tile__)								\
+	_Generic(tile__,								\
+		 const struct xe_tile * : (const struct xe_device *)((tile__)->xe),	\
+		 struct xe_tile * : (tile__)->xe)
+
+/**
+ * struct xe_mem_region - memory region structure
+ * This is used to describe a memory region in xe
+ * device, such as HBM memory or CXL extension memory.
+ */
+struct xe_mem_region {
+	/** @io_start: IO start address of this VRAM instance */
+	resource_size_t io_start;
+	/**
+	 * @io_size: IO size of this VRAM instance
+	 *
+	 * This represents how much of this VRAM we can access
+	 * via the CPU through the VRAM BAR. This can be smaller
+	 * than @usable_size, in which case only part of VRAM is CPU
+	 * accessible (typically the first 256M). This
+	 * configuration is known as small-bar.
+	 */
+	resource_size_t io_size;
+	/** @dpa_base: This memory regions's DPA (device physical address) base */
+	resource_size_t dpa_base;
+	/**
+	 * @usable_size: usable size of VRAM
+	 *
+	 * Usable size of VRAM excluding reserved portions
+	 * (e.g stolen mem)
+	 */
+	resource_size_t usable_size;
+	/**
+	 * @actual_physical_size: Actual VRAM size
+	 *
+	 * Actual VRAM size including reserved portions
+	 * (e.g stolen mem)
+	 */
+	resource_size_t actual_physical_size;
+	/** @mapping: pointer to VRAM mappable space */
+	void *__iomem mapping;
+};
+
+/**
+ * struct xe_tile - hardware tile structure
+ *
+ * From a driver perspective, a "tile" is effectively a complete GPU, containing
+ * an SGunit, 1-2 GTs, and (for discrete platforms) VRAM.
+ *
+ * Multi-tile platforms effectively bundle multiple GPUs behind a single PCI
+ * device and designate one "root" tile as being responsible for external PCI
+ * communication.  PCI BAR0 exposes the GGTT and MMIO register space for each
+ * tile in a stacked layout, and PCI BAR2 exposes the local memory associated
+ * with each tile similarly.  Device-wide interrupts can be enabled/disabled
+ * at the root tile, and the MSTR_TILE_INTR register will report which tiles
+ * have interrupts that need servicing.
+ */
+struct xe_tile {
+	/** @xe: Backpointer to tile's PCI device */
+	struct xe_device *xe;
+
+	/** @id: ID of the tile */
+	u8 id;
+
+	/**
+	 * @primary_gt: Primary GT
+	 */
+	struct xe_gt *primary_gt;
+
+	/**
+	 * @media_gt: Media GT
+	 *
+	 * Only present on devices with media version >= 13.
+	 */
+	struct xe_gt *media_gt;
+
+	/**
+	 * @mmio: MMIO info for a tile.
+	 *
+	 * Each tile has its own 16MB space in BAR0, laid out as:
+	 * * 0-4MB: registers
+	 * * 4MB-8MB: reserved
+	 * * 8MB-16MB: global GTT
+	 */
+	struct {
+		/** @size: size of tile's MMIO space */
+		size_t size;
+
+		/** @regs: pointer to tile's MMIO space (starting with registers) */
+		void *regs;
+	} mmio;
+
+	/**
+	 * @mmio_ext: MMIO-extension info for a tile.
+	 *
+	 * Each tile has its own additional 256MB (28-bit) MMIO-extension space.
+	 */
+	struct {
+		/** @size: size of tile's additional MMIO-extension space */
+		size_t size;
+
+		/** @regs: pointer to tile's additional MMIO-extension space */
+		void *regs;
+	} mmio_ext;
+
+	/** @mem: memory management info for tile */
+	struct {
+		/**
+		 * @vram: VRAM info for tile.
+		 *
+		 * Although VRAM is associated with a specific tile, it can
+		 * still be accessed by all tiles' GTs.
+		 */
+		struct xe_mem_region vram;
+
+		/** @vram_mgr: VRAM TTM manager */
+		struct xe_ttm_vram_mgr *vram_mgr;
+
+		/** @ggtt: Global graphics translation table */
+		struct xe_ggtt *ggtt;
+
+		/**
+		 * @kernel_bb_pool: Pool from which batchbuffers are allocated.
+		 *
+		 * Media GT shares a pool with its primary GT.
+		 */
+		struct xe_sa_manager *kernel_bb_pool;
+	} mem;
+
+	/** @sriov: tile level virtualization data */
+	union {
+		struct {
+			/** @sriov.pf.lmtt: Local Memory Translation Table. */
+			struct xe_lmtt lmtt;
+		} pf;
+	} sriov;
+
+	/** @migrate: Migration helper for vram blits and clearing */
+	struct xe_migrate *migrate;
+
+	/** @sysfs: sysfs' kobj used by xe_tile_sysfs */
+	struct kobject *sysfs;
+};
+
+/**
+ * struct xe_device - Top level struct of XE device
+ */
+struct xe_device {
+	/** @drm: drm device */
+	struct drm_device drm;
+
+	/** @devcoredump: device coredump */
+	struct xe_devcoredump devcoredump;
+
+	/** @info: device info */
+	struct intel_device_info {
+		/** @graphics_name: graphics IP name */
+		const char *graphics_name;
+		/** @media_name: media IP name */
+		const char *media_name;
+		/** @tile_mmio_ext_size: size of MMIO extension space, per-tile */
+		u32 tile_mmio_ext_size;
+		/** @graphics_verx100: graphics IP version */
+		u32 graphics_verx100;
+		/** @media_verx100: media IP version */
+		u32 media_verx100;
+		/** @mem_region_mask: mask of valid memory regions */
+		u32 mem_region_mask;
+		/** @platform: XE platform enum */
+		enum xe_platform platform;
+		/** @subplatform: XE subplatform enum */
+		enum xe_subplatform subplatform;
+		/** @devid: device ID */
+		u16 devid;
+		/** @revid: device revision */
+		u8 revid;
+		/** @step: stepping information for each IP */
+		struct xe_step_info step;
+		/** @dma_mask_size: DMA address bits */
+		u8 dma_mask_size;
+		/** @vram_flags: Vram flags */
+		u8 vram_flags;
+		/** @tile_count: Number of tiles */
+		u8 tile_count;
+		/** @gt_count: Total number of GTs for entire device */
+		u8 gt_count;
+		/** @vm_max_level: Max VM level */
+		u8 vm_max_level;
+		/** @va_bits: Maximum bits of a virtual address */
+		u8 va_bits;
+
+		/** @is_dgfx: is discrete device */
+		u8 is_dgfx:1;
+		/** @has_asid: Has address space ID */
+		u8 has_asid:1;
+		/** @force_execlist: Forced execlist submission */
+		u8 force_execlist:1;
+		/** @has_flat_ccs: Whether flat CCS metadata is used */
+		u8 has_flat_ccs:1;
+		/** @has_llc: Device has a shared CPU+GPU last level cache */
+		u8 has_llc:1;
+		/** @has_mmio_ext: Device has extra MMIO address range */
+		u8 has_mmio_ext:1;
+		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
+		u8 has_range_tlb_invalidation:1;
+		/** @has_sriov: Supports SR-IOV */
+		u8 has_sriov:1;
+		/** @has_usm: Device has unified shared memory support */
+		u8 has_usm:1;
+		/** @enable_display: display enabled */
+		u8 enable_display:1;
+		/** @skip_mtcfg: skip Multi-Tile configuration from MTCFG register */
+		u8 skip_mtcfg:1;
+		/** @skip_pcode: skip access to PCODE uC */
+		u8 skip_pcode:1;
+		/** @has_heci_gscfi: device has heci gscfi */
+		u8 has_heci_gscfi:1;
+		/** @skip_guc_pc: Skip GuC based PM feature init */
+		u8 skip_guc_pc:1;
+
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+		struct {
+			u32 rawclk_freq;
+		} i915_runtime;
+#endif
+	} info;
+
+	/** @irq: device interrupt state */
+	struct {
+		/** @lock: lock for processing irq's on this device */
+		spinlock_t lock;
+
+		/** @enabled: interrupts enabled on this device */
+		bool enabled;
+	} irq;
+
+	/** @ttm: ttm device */
+	struct ttm_device ttm;
+
+	/** @mmio: mmio info for device */
+	struct {
+		/** @size: size of MMIO space for device */
+		size_t size;
+		/** @regs: pointer to MMIO space for device */
+		void *regs;
+	} mmio;
+
+	/** @mem: memory info for device */
+	struct {
+		/** @vram: VRAM info for device */
+		struct xe_mem_region vram;
+		/** @sys_mgr: system TTM manager */
+		struct ttm_resource_manager sys_mgr;
+	} mem;
+
+	/** @sriov: device level virtualization data */
+	struct {
+		/** @sriov.__mode: SR-IOV mode (Don't access directly!) */
+		enum xe_sriov_mode __mode;
+	} sriov;
+
+	/** @clients: drm clients info */
+	struct {
+		/** @lock: Protects drm clients info */
+		spinlock_t lock;
+
+		/** @count: number of drm clients */
+		u64 count;
+	} clients;
+
+	/** @usm: unified memory state */
+	struct {
+		/** @asid: convert a ASID to VM */
+		struct xarray asid_to_vm;
+		/** @next_asid: next ASID, used to cyclical alloc asids */
+		u32 next_asid;
+		/** @num_vm_in_fault_mode: number of VM in fault mode */
+		u32 num_vm_in_fault_mode;
+		/** @num_vm_in_non_fault_mode: number of VM in non-fault mode */
+		u32 num_vm_in_non_fault_mode;
+		/** @lock: protects UM state */
+		struct mutex lock;
+	} usm;
+
+	/** @persistent_engines: engines that are closed but still running */
+	struct {
+		/** @lock: protects persistent engines */
+		struct mutex lock;
+		/** @list: list of persistent engines */
+		struct list_head list;
+	} persistent_engines;
+
+	/** @pinned: pinned BO state */
+	struct {
+		/** @lock: protected pinned BO list state */
+		spinlock_t lock;
+		/** @evicted: pinned kernel BO that are present */
+		struct list_head kernel_bo_present;
+		/** @evicted: pinned BO that have been evicted */
+		struct list_head evicted;
+		/** @external_vram: pinned external BO in vram*/
+		struct list_head external_vram;
+	} pinned;
+
+	/** @ufence_wq: user fence wait queue */
+	wait_queue_head_t ufence_wq;
+
+	/** @ordered_wq: used to serialize compute mode resume */
+	struct workqueue_struct *ordered_wq;
+
+	/** @unordered_wq: used to serialize unordered work, mostly display */
+	struct workqueue_struct *unordered_wq;
+
+	/** @tiles: device tiles */
+	struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE];
+
+	/**
+	 * @mem_access: keep track of memory access in the device, possibly
+	 * triggering additional actions when they occur.
+	 */
+	struct {
+		/** @ref: ref count of memory accesses */
+		atomic_t ref;
+	} mem_access;
+
+	/**
+	 * @pat: Encapsulate PAT related stuff
+	 */
+	struct {
+		/** Internal operations to abstract platforms */
+		const struct xe_pat_ops *ops;
+		/** PAT table to program in the HW */
+		const struct xe_pat_table_entry *table;
+		/** Number of PAT entries */
+		int n_entries;
+		u32 idx[__XE_CACHE_LEVEL_COUNT];
+	} pat;
+
+	/** @d3cold: Encapsulate d3cold related stuff */
+	struct {
+		/** capable: Indicates if root port is d3cold capable */
+		bool capable;
+
+		/** @allowed: Indicates if d3cold is a valid device state */
+		bool allowed;
+
+		/** @power_lost: Indicates if card has really lost power. */
+		bool power_lost;
+
+		/**
+		 * @vram_threshold:
+		 *
+		 * This represents the permissible threshold(in megabytes)
+		 * for vram save/restore. d3cold will be disallowed,
+		 * when vram_usages is above or equals the threshold value
+		 * to avoid the vram save/restore latency.
+		 * Default threshold value is 300mb.
+		 */
+		u32 vram_threshold;
+		/** @lock: protect vram_threshold */
+		struct mutex lock;
+	} d3cold;
+
+	/**
+	 * @pm_callback_task: Track the active task that is running in either
+	 * the runtime_suspend or runtime_resume callbacks.
+	 */
+	struct task_struct *pm_callback_task;
+
+	/** @hwmon: hwmon subsystem integration */
+	struct xe_hwmon *hwmon;
+
+	/** @heci_gsc: graphics security controller */
+	struct xe_heci_gsc heci_gsc;
+
+	/** @needs_flr_on_fini: requests function-reset on fini */
+	bool needs_flr_on_fini;
+
+	/* private: */
+
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+	/*
+	 * Any fields below this point are the ones used by display.
+	 * They are temporarily added here so xe_device can be desguised as
+	 * drm_i915_private during build. After cleanup these should go away,
+	 * migrating to the right sub-structs
+	 */
+	struct intel_display display;
+	enum intel_pch pch_type;
+	u16 pch_id;
+
+	struct dram_info {
+		bool wm_lv_0_adjust_needed;
+		u8 num_channels;
+		bool symmetric_memory;
+		enum intel_dram_type {
+			INTEL_DRAM_UNKNOWN,
+			INTEL_DRAM_DDR3,
+			INTEL_DRAM_DDR4,
+			INTEL_DRAM_LPDDR3,
+			INTEL_DRAM_LPDDR4,
+			INTEL_DRAM_DDR5,
+			INTEL_DRAM_LPDDR5,
+		} type;
+		u8 num_qgv_points;
+		u8 num_psf_gv_points;
+	} dram_info;
+
+	/*
+	 * edram size in MB.
+	 * Cannot be determined by PCIID. You must always read a register.
+	 */
+	u32 edram_size_mb;
+
+	/* To shut up runtime pm macros.. */
+	struct xe_runtime_pm {} runtime_pm;
+
+	/* For pcode */
+	struct mutex sb_lock;
+
+	/* Should be in struct intel_display */
+	u32 skl_preferred_vco_freq, max_dotclk_freq, hti_state;
+	u8 snps_phy_failed_calibration;
+	struct drm_atomic_state *modeset_restore_state;
+	struct list_head global_obj_list;
+
+	union {
+		/* only to allow build, not used functionally */
+		u32 irq_mask;
+		u32 de_irq_mask[I915_MAX_PIPES];
+	};
+	u32 pipestat_irq_mask[I915_MAX_PIPES];
+
+	bool display_irqs_enabled;
+	u32 enabled_irq_mask;
+
+	struct intel_uncore {
+		spinlock_t lock;
+	} uncore;
+
+	/* only to allow build, not used functionally */
+	struct {
+		unsigned int hpll_freq;
+		unsigned int czclk_freq;
+		unsigned int fsb_freq, mem_freq, is_ddr3;
+		u8 vblank_enabled;
+	};
+	struct {
+		const char *dmc_firmware_path;
+	} params;
+
+	void *pxp;
+#endif
+};
+
+/**
+ * struct xe_file - file handle for XE driver
+ */
+struct xe_file {
+	/** @xe: xe DEVICE **/
+	struct xe_device *xe;
+
+	/** @drm: base DRM file */
+	struct drm_file *drm;
+
+	/** @vm: VM state for file */
+	struct {
+		/** @xe: xarray to store VMs */
+		struct xarray xa;
+		/** @lock: protects file VM state */
+		struct mutex lock;
+	} vm;
+
+	/** @exec_queue: Submission exec queue state for file */
+	struct {
+		/** @xe: xarray to store engines */
+		struct xarray xa;
+		/** @lock: protects file engine state */
+		struct mutex lock;
+	} exec_queue;
+
+	/** @client: drm client */
+	struct xe_drm_client *client;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c
new file mode 100644
index 000000000000..74391d9b11ae
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_display.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_display.h"
+#include "regs/xe_regs.h"
+
+#include <linux/fb.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
+
+#include "soc/intel_dram.h"
+#include "i915_drv.h"		/* FIXME: HAS_DISPLAY() depends on this */
+#include "intel_acpi.h"
+#include "intel_audio.h"
+#include "intel_bw.h"
+#include "intel_display.h"
+#include "intel_display_driver.h"
+#include "intel_display_irq.h"
+#include "intel_display_types.h"
+#include "intel_dmc.h"
+#include "intel_dp.h"
+#include "intel_fbdev.h"
+#include "intel_hdcp.h"
+#include "intel_hotplug.h"
+#include "intel_opregion.h"
+#include "xe_module.h"
+
+/* Xe device functions */
+
+static bool has_display(struct xe_device *xe)
+{
+	return HAS_DISPLAY(xe);
+}
+
+/**
+ * xe_display_driver_probe_defer - Detect if we need to wait for other drivers
+ *				   early on
+ * @pdev: PCI device
+ *
+ * Returns: true if probe needs to be deferred, false otherwise
+ */
+bool xe_display_driver_probe_defer(struct pci_dev *pdev)
+{
+	if (!xe_modparam.enable_display)
+		return 0;
+
+	return intel_display_driver_probe_defer(pdev);
+}
+
+static void xe_display_last_close(struct drm_device *dev)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (xe->info.enable_display)
+		intel_fbdev_restore_mode(to_xe_device(dev));
+}
+
+/**
+ * xe_display_driver_set_hooks - Add driver flags and hooks for display
+ * @driver: DRM device driver
+ *
+ * Set features and function hooks in @driver that are needed for driving the
+ * display IP. This sets the driver's capability of driving display, regardless
+ * if the device has it enabled
+ */
+void xe_display_driver_set_hooks(struct drm_driver *driver)
+{
+	if (!xe_modparam.enable_display)
+		return;
+
+	driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC;
+	driver->lastclose = xe_display_last_close;
+}
+
+static void unset_display_features(struct xe_device *xe)
+{
+	xe->drm.driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC);
+}
+
+static void display_destroy(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	destroy_workqueue(xe->display.hotplug.dp_wq);
+}
+
+/**
+ * xe_display_create - create display struct
+ * @xe: XE device instance
+ *
+ * Initialize all fields used by the display part.
+ *
+ * TODO: once everything can be inside a single struct, make the struct opaque
+ * to the rest of xe and return it to be xe->display.
+ *
+ * Returns: 0 on success
+ */
+int xe_display_create(struct xe_device *xe)
+{
+	int err;
+
+	spin_lock_init(&xe->display.fb_tracking.lock);
+
+	xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
+
+	drmm_mutex_init(&xe->drm, &xe->sb_lock);
+	drmm_mutex_init(&xe->drm, &xe->display.backlight.lock);
+	drmm_mutex_init(&xe->drm, &xe->display.audio.mutex);
+	drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex);
+	drmm_mutex_init(&xe->drm, &xe->display.pps.mutex);
+	drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex);
+	xe->enabled_irq_mask = ~0;
+
+	err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void xe_display_fini_nommio(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (!xe->info.enable_display)
+		return;
+
+	intel_power_domains_cleanup(xe);
+}
+
+int xe_display_init_nommio(struct xe_device *xe)
+{
+	int err;
+
+	if (!xe->info.enable_display)
+		return 0;
+
+	/* Fake uncore lock */
+	spin_lock_init(&xe->uncore.lock);
+
+	/* This must be called before any calls to HAS_PCH_* */
+	intel_detect_pch(xe);
+
+	err = intel_power_domains_init(xe);
+	if (err)
+		return err;
+
+	return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe);
+}
+
+static void xe_display_fini_noirq(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_driver_remove_noirq(xe);
+	intel_power_domains_driver_remove(xe);
+}
+
+int xe_display_init_noirq(struct xe_device *xe)
+{
+	int err;
+
+	if (!xe->info.enable_display)
+		return 0;
+
+	intel_display_driver_early_probe(xe);
+
+	/* Early display init.. */
+	intel_opregion_setup(xe);
+
+	/*
+	 * Fill the dram structure to get the system dram info. This will be
+	 * used for memory latency calculation.
+	 */
+	intel_dram_detect(xe);
+
+	intel_bw_init_hw(xe);
+
+	intel_display_device_info_runtime_init(xe);
+
+	err = intel_display_driver_probe_noirq(xe);
+	if (err)
+		return err;
+
+	return drmm_add_action_or_reset(&xe->drm, xe_display_fini_noirq, NULL);
+}
+
+static void xe_display_fini_noaccel(struct drm_device *dev, void *dummy)
+{
+	struct xe_device *xe = to_xe_device(dev);
+
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_driver_remove_nogem(xe);
+}
+
+int xe_display_init_noaccel(struct xe_device *xe)
+{
+	int err;
+
+	if (!xe->info.enable_display)
+		return 0;
+
+	err = intel_display_driver_probe_nogem(xe);
+	if (err)
+		return err;
+
+	return drmm_add_action_or_reset(&xe->drm, xe_display_fini_noaccel, NULL);
+}
+
+int xe_display_init(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return 0;
+
+	return intel_display_driver_probe(xe);
+}
+
+void xe_display_fini(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	/* poll work can call into fbdev, hence clean that up afterwards */
+	intel_hpd_poll_fini(xe);
+	intel_fbdev_fini(xe);
+
+	intel_hdcp_component_fini(xe);
+	intel_audio_deinit(xe);
+}
+
+void xe_display_register(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_driver_register(xe);
+	intel_register_dsm_handler();
+	intel_power_domains_enable(xe);
+}
+
+void xe_display_unregister(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_unregister_dsm_handler();
+	intel_power_domains_disable(xe);
+	intel_display_driver_unregister(xe);
+}
+
+void xe_display_driver_remove(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_driver_remove(xe);
+
+	intel_display_device_remove(xe);
+}
+
+/* IRQ-related functions */
+
+void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	if (master_ctl & DISPLAY_IRQ)
+		gen11_display_irq_handler(xe);
+}
+
+void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	if (gu_misc_iir & GU_MISC_GSE)
+		intel_opregion_asle_intr(xe);
+}
+
+void xe_display_irq_reset(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	gen11_display_irq_reset(xe);
+}
+
+void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	if (gt->info.id == XE_GT0)
+		gen11_de_irq_postinstall(xe);
+}
+
+static void intel_suspend_encoders(struct xe_device *xe)
+{
+	struct drm_device *dev = &xe->drm;
+	struct intel_encoder *encoder;
+
+	if (has_display(xe))
+		return;
+
+	drm_modeset_lock_all(dev);
+	for_each_intel_encoder(dev, encoder)
+		if (encoder->suspend)
+			encoder->suspend(encoder);
+	drm_modeset_unlock_all(dev);
+}
+
+static bool suspend_to_idle(void)
+{
+#if IS_ENABLED(CONFIG_ACPI_SLEEP)
+	if (acpi_target_system_state() < ACPI_STATE_S3)
+		return true;
+#endif
+	return false;
+}
+
+void xe_display_pm_suspend(struct xe_device *xe)
+{
+	bool s2idle = suspend_to_idle();
+	if (!xe->info.enable_display)
+		return;
+
+	/*
+	 * We do a lot of poking in a lot of registers, make sure they work
+	 * properly.
+	 */
+	intel_power_domains_disable(xe);
+	if (has_display(xe))
+		drm_kms_helper_poll_disable(&xe->drm);
+
+	intel_display_driver_suspend(xe);
+
+	intel_dp_mst_suspend(xe);
+
+	intel_hpd_cancel_work(xe);
+
+	intel_suspend_encoders(xe);
+
+	intel_opregion_suspend(xe, s2idle ? PCI_D1 : PCI_D3cold);
+
+	intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true);
+
+	intel_dmc_suspend(xe);
+}
+
+void xe_display_pm_suspend_late(struct xe_device *xe)
+{
+	bool s2idle = suspend_to_idle();
+	if (!xe->info.enable_display)
+		return;
+
+	intel_power_domains_suspend(xe, s2idle);
+
+	intel_display_power_suspend_late(xe);
+}
+
+void xe_display_pm_resume_early(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_display_power_resume_early(xe);
+
+	intel_power_domains_resume(xe);
+}
+
+void xe_display_pm_resume(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		return;
+
+	intel_dmc_resume(xe);
+
+	if (has_display(xe))
+		drm_mode_config_reset(&xe->drm);
+
+	intel_display_driver_init_hw(xe);
+	intel_hpd_init(xe);
+
+	/* MST sideband requires HPD interrupts enabled */
+	intel_dp_mst_resume(xe);
+	intel_display_driver_resume(xe);
+
+	intel_hpd_poll_disable(xe);
+	if (has_display(xe))
+		drm_kms_helper_poll_enable(&xe->drm);
+
+	intel_opregion_resume(xe);
+
+	intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
+
+	intel_power_domains_enable(xe);
+}
+
+void xe_display_probe(struct xe_device *xe)
+{
+	if (!xe->info.enable_display)
+		goto no_display;
+
+	intel_display_device_probe(xe);
+
+	if (has_display(xe))
+		return;
+
+no_display:
+	xe->info.enable_display = false;
+	unset_display_features(xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_display.h b/drivers/gpu/drm/xe/xe_display.h
new file mode 100644
index 000000000000..710e56180b52
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_display.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DISPLAY_H_
+#define _XE_DISPLAY_H_
+
+#include "xe_device.h"
+
+struct drm_driver;
+
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+
+bool xe_display_driver_probe_defer(struct pci_dev *pdev);
+void xe_display_driver_set_hooks(struct drm_driver *driver);
+void xe_display_driver_remove(struct xe_device *xe);
+
+int xe_display_create(struct xe_device *xe);
+
+void xe_display_probe(struct xe_device *xe);
+
+int xe_display_init_nommio(struct xe_device *xe);
+int xe_display_init_noirq(struct xe_device *xe);
+int xe_display_init_noaccel(struct xe_device *xe);
+int xe_display_init(struct xe_device *xe);
+void xe_display_fini(struct xe_device *xe);
+
+void xe_display_register(struct xe_device *xe);
+void xe_display_unregister(struct xe_device *xe);
+
+void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl);
+void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir);
+void xe_display_irq_reset(struct xe_device *xe);
+void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
+
+void xe_display_pm_suspend(struct xe_device *xe);
+void xe_display_pm_suspend_late(struct xe_device *xe);
+void xe_display_pm_resume_early(struct xe_device *xe);
+void xe_display_pm_resume(struct xe_device *xe);
+
+#else
+
+static inline int xe_display_driver_probe_defer(struct pci_dev *pdev) { return 0; }
+static inline void xe_display_driver_set_hooks(struct drm_driver *driver) { }
+static inline void xe_display_driver_remove(struct xe_device *xe) {}
+
+static inline int xe_display_create(struct xe_device *xe) { return 0; }
+
+static inline void xe_display_probe(struct xe_device *xe) { }
+
+static inline int xe_display_init_nommio(struct xe_device *xe) { return 0; }
+static inline int xe_display_init_noirq(struct xe_device *xe) { return 0; }
+static inline int xe_display_init_noaccel(struct xe_device *xe) { return 0; }
+static inline int xe_display_init(struct xe_device *xe) { return 0; }
+static inline void xe_display_fini(struct xe_device *xe) {}
+
+static inline void xe_display_register(struct xe_device *xe) {}
+static inline void xe_display_unregister(struct xe_device *xe) {}
+
+static inline void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) {}
+static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) {}
+static inline void xe_display_irq_reset(struct xe_device *xe) {}
+static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
+
+static inline void xe_display_pm_suspend(struct xe_device *xe) {}
+static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
+static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
+static inline void xe_display_pm_resume(struct xe_device *xe) {}
+
+#endif /* CONFIG_DRM_XE_DISPLAY */
+#endif /* _XE_DISPLAY_H_ */
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
new file mode 100644
index 000000000000..64ed303728fd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_dma_buf.h"
+
+#include <kunit/test.h>
+#include <linux/dma-buf.h>
+#include <linux/pci-p2pdma.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_prime.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "tests/xe_test.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_vm.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+static int xe_dma_buf_attach(struct dma_buf *dmabuf,
+			     struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+
+	if (attach->peer2peer &&
+	    pci_p2pdma_distance(to_pci_dev(obj->dev->dev), attach->dev, false) < 0)
+		attach->peer2peer = false;
+
+	if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT))
+		return -EOPNOTSUPP;
+
+	xe_device_mem_access_get(to_xe_device(obj->dev));
+	return 0;
+}
+
+static void xe_dma_buf_detach(struct dma_buf *dmabuf,
+			      struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+
+	xe_device_mem_access_put(to_xe_device(obj->dev));
+}
+
+static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	struct xe_device *xe = xe_bo_device(bo);
+	int ret;
+
+	/*
+	 * For now only support pinning in TT memory, for two reasons:
+	 * 1) Avoid pinning in a placement not accessible to some importers.
+	 * 2) Pinning in VRAM requires PIN accounting which is a to-do.
+	 */
+	if (xe_bo_is_pinned(bo) && bo->ttm.resource->placement != XE_PL_TT) {
+		drm_dbg(&xe->drm, "Can't migrate pinned bo for dma-buf pin.\n");
+		return -EINVAL;
+	}
+
+	ret = xe_bo_migrate(bo, XE_PL_TT);
+	if (ret) {
+		if (ret != -EINTR && ret != -ERESTARTSYS)
+			drm_dbg(&xe->drm,
+				"Failed migrating dma-buf to TT memory: %pe\n",
+				ERR_PTR(ret));
+		return ret;
+	}
+
+	ret = xe_bo_pin_external(bo);
+	xe_assert(xe, !ret);
+
+	return 0;
+}
+
+static void xe_dma_buf_unpin(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+
+	xe_bo_unpin_external(bo);
+}
+
+static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
+				       enum dma_data_direction dir)
+{
+	struct dma_buf *dma_buf = attach->dmabuf;
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	struct sg_table *sgt;
+	int r = 0;
+
+	if (!attach->peer2peer && !xe_bo_can_migrate(bo, XE_PL_TT))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (!xe_bo_is_pinned(bo)) {
+		if (!attach->peer2peer)
+			r = xe_bo_migrate(bo, XE_PL_TT);
+		else
+			r = xe_bo_validate(bo, NULL, false);
+		if (r)
+			return ERR_PTR(r);
+	}
+
+	switch (bo->ttm.resource->mem_type) {
+	case XE_PL_TT:
+		sgt = drm_prime_pages_to_sg(obj->dev,
+					    bo->ttm.ttm->pages,
+					    bo->ttm.ttm->num_pages);
+		if (IS_ERR(sgt))
+			return sgt;
+
+		if (dma_map_sgtable(attach->dev, sgt, dir,
+				    DMA_ATTR_SKIP_CPU_SYNC))
+			goto error_free;
+		break;
+
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+		r = xe_ttm_vram_mgr_alloc_sgt(xe_bo_device(bo),
+					      bo->ttm.resource, 0,
+					      bo->ttm.base.size, attach->dev,
+					      dir, &sgt);
+		if (r)
+			return ERR_PTR(r);
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	return sgt;
+
+error_free:
+	sg_free_table(sgt);
+	kfree(sgt);
+	return ERR_PTR(-EBUSY);
+}
+
+static void xe_dma_buf_unmap(struct dma_buf_attachment *attach,
+			     struct sg_table *sgt,
+			     enum dma_data_direction dir)
+{
+	struct dma_buf *dma_buf = attach->dmabuf;
+	struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv);
+
+	if (!xe_bo_is_vram(bo)) {
+		dma_unmap_sgtable(attach->dev, sgt, dir, 0);
+		sg_free_table(sgt);
+		kfree(sgt);
+	} else {
+		xe_ttm_vram_mgr_free_sgt(attach->dev, dir, sgt);
+	}
+}
+
+static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
+				       enum dma_data_direction direction)
+{
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	bool reads =  (direction == DMA_BIDIRECTIONAL ||
+		       direction == DMA_FROM_DEVICE);
+
+	if (!reads)
+		return 0;
+
+	/* Can we do interruptible lock here? */
+	xe_bo_lock(bo, false);
+	(void)xe_bo_migrate(bo, XE_PL_TT);
+	xe_bo_unlock(bo);
+
+	return 0;
+}
+
+const struct dma_buf_ops xe_dmabuf_ops = {
+	.attach = xe_dma_buf_attach,
+	.detach = xe_dma_buf_detach,
+	.pin = xe_dma_buf_pin,
+	.unpin = xe_dma_buf_unpin,
+	.map_dma_buf = xe_dma_buf_map,
+	.unmap_dma_buf = xe_dma_buf_unmap,
+	.release = drm_gem_dmabuf_release,
+	.begin_cpu_access = xe_dma_buf_begin_cpu_access,
+	.mmap = drm_gem_dmabuf_mmap,
+	.vmap = drm_gem_dmabuf_vmap,
+	.vunmap = drm_gem_dmabuf_vunmap,
+};
+
+struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags)
+{
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+	struct dma_buf *buf;
+
+	if (bo->vm)
+		return ERR_PTR(-EPERM);
+
+	buf = drm_gem_prime_export(obj, flags);
+	if (!IS_ERR(buf))
+		buf->ops = &xe_dmabuf_ops;
+
+	return buf;
+}
+
+static struct drm_gem_object *
+xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
+		    struct dma_buf *dma_buf)
+{
+	struct dma_resv *resv = dma_buf->resv;
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_bo *bo;
+	int ret;
+
+	dma_resv_lock(resv, NULL);
+	bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
+				    0, /* Will require 1way or 2way for vm_bind */
+				    ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
+	if (IS_ERR(bo)) {
+		ret = PTR_ERR(bo);
+		goto error;
+	}
+	dma_resv_unlock(resv);
+
+	return &bo->ttm.base;
+
+error:
+	dma_resv_unlock(resv);
+	return ERR_PTR(ret);
+}
+
+static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->importer_priv;
+	struct xe_bo *bo = gem_to_xe_bo(obj);
+
+	XE_WARN_ON(xe_bo_evict(bo, false));
+}
+
+static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = {
+	.allow_peer2peer = true,
+	.move_notify = xe_dma_buf_move_notify
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+
+struct dma_buf_test_params {
+	struct xe_test_priv base;
+	const struct dma_buf_attach_ops *attach_ops;
+	bool force_different_devices;
+	u32 mem_mask;
+};
+
+#define to_dma_buf_test_params(_priv) \
+	container_of(_priv, struct dma_buf_test_params, base)
+#endif
+
+struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
+					   struct dma_buf *dma_buf)
+{
+	XE_TEST_DECLARE(struct dma_buf_test_params *test =
+			to_dma_buf_test_params
+			(xe_cur_kunit_priv(XE_TEST_LIVE_DMA_BUF));)
+	const struct dma_buf_attach_ops *attach_ops;
+	struct dma_buf_attachment *attach;
+	struct drm_gem_object *obj;
+	struct xe_bo *bo;
+
+	if (dma_buf->ops == &xe_dmabuf_ops) {
+		obj = dma_buf->priv;
+		if (obj->dev == dev &&
+		    !XE_TEST_ONLY(test && test->force_different_devices)) {
+			/*
+			 * Importing dmabuf exported from out own gem increases
+			 * refcount on gem itself instead of f_count of dmabuf.
+			 */
+			drm_gem_object_get(obj);
+			return obj;
+		}
+	}
+
+	/*
+	 * Don't publish the bo until we have a valid attachment, and a
+	 * valid attachment needs the bo address. So pre-create a bo before
+	 * creating the attachment and publish.
+	 */
+	bo = xe_bo_alloc();
+	if (IS_ERR(bo))
+		return ERR_CAST(bo);
+
+	attach_ops = &xe_dma_buf_attach_ops;
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+	if (test)
+		attach_ops = test->attach_ops;
+#endif
+
+	attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base);
+	if (IS_ERR(attach)) {
+		obj = ERR_CAST(attach);
+		goto out_err;
+	}
+
+	/* Errors here will take care of freeing the bo. */
+	obj = xe_dma_buf_init_obj(dev, bo, dma_buf);
+	if (IS_ERR(obj))
+		return obj;
+
+
+	get_dma_buf(dma_buf);
+	obj->import_attach = attach;
+	return obj;
+
+out_err:
+	xe_bo_free(bo);
+
+	return obj;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_dma_buf.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.h b/drivers/gpu/drm/xe/xe_dma_buf.h
new file mode 100644
index 000000000000..861dd28a862c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_dma_buf.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_DMA_BUF_H_
+#define _XE_DMA_BUF_H_
+
+#include <drm/drm_gem.h>
+
+struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags);
+struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
+					   struct dma_buf *dma_buf);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
new file mode 100644
index 000000000000..82d1305e831f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+#include <drm/xe_drm.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "xe_bo.h"
+#include "xe_bo_types.h"
+#include "xe_device_types.h"
+#include "xe_drm_client.h"
+#include "xe_trace.h"
+
+/**
+ * xe_drm_client_alloc() - Allocate drm client
+ * @void: No arg
+ *
+ * Allocate drm client struct to track client memory against
+ * same till client life. Call this API whenever new client
+ * has opened xe device.
+ *
+ * Return: pointer to client struct or NULL if can't allocate
+ */
+struct xe_drm_client *xe_drm_client_alloc(void)
+{
+	struct xe_drm_client *client;
+
+	client = kzalloc(sizeof(*client), GFP_KERNEL);
+	if (!client)
+		return NULL;
+
+	kref_init(&client->kref);
+
+#ifdef CONFIG_PROC_FS
+	spin_lock_init(&client->bos_lock);
+	INIT_LIST_HEAD(&client->bos_list);
+#endif
+	return client;
+}
+
+/**
+ * __xe_drm_client_free() - Free client struct
+ * @kref: The reference
+ *
+ * This frees client struct. Call this API when xe device is closed
+ * by drm client.
+ *
+ * Return: void
+ */
+void __xe_drm_client_free(struct kref *kref)
+{
+	struct xe_drm_client *client =
+		container_of(kref, typeof(*client), kref);
+
+	kfree(client);
+}
+
+#ifdef CONFIG_PROC_FS
+/**
+ * xe_drm_client_add_bo() - Add BO for tracking client mem usage
+ * @client: The drm client ptr
+ * @bo: The xe BO ptr
+ *
+ * Add all BO created by individual drm client by calling this function.
+ * This helps in tracking client memory usage.
+ *
+ * Return: void
+ */
+void xe_drm_client_add_bo(struct xe_drm_client *client,
+			  struct xe_bo *bo)
+{
+	XE_WARN_ON(bo->client);
+	XE_WARN_ON(!list_empty(&bo->client_link));
+
+	spin_lock(&client->bos_lock);
+	bo->client = xe_drm_client_get(client);
+	list_add_tail_rcu(&bo->client_link, &client->bos_list);
+	spin_unlock(&client->bos_lock);
+}
+
+/**
+ * xe_drm_client_remove_bo() - Remove BO for tracking client mem usage
+ * @bo: The xe BO ptr
+ *
+ * Remove all BO removed by individual drm client by calling this function.
+ * This helps in tracking client memory usage.
+ *
+ * Return: void
+ */
+void xe_drm_client_remove_bo(struct xe_bo *bo)
+{
+	struct xe_drm_client *client = bo->client;
+
+	spin_lock(&client->bos_lock);
+	list_del_rcu(&bo->client_link);
+	spin_unlock(&client->bos_lock);
+
+	xe_drm_client_put(client);
+}
+
+static void bo_meminfo(struct xe_bo *bo,
+		       struct drm_memory_stats stats[TTM_NUM_MEM_TYPES])
+{
+	u64 sz = bo->size;
+	u32 mem_type;
+
+	if (bo->placement.placement)
+		mem_type = bo->placement.placement->mem_type;
+	else
+		mem_type = XE_PL_TT;
+
+	if (bo->ttm.base.handle_count > 1)
+		stats[mem_type].shared += sz;
+	else
+		stats[mem_type].private += sz;
+
+	if (xe_bo_has_pages(bo)) {
+		stats[mem_type].resident += sz;
+
+		if (!dma_resv_test_signaled(bo->ttm.base.resv,
+					    DMA_RESV_USAGE_BOOKKEEP))
+			stats[mem_type].active += sz;
+		else if (mem_type == XE_PL_SYSTEM)
+			stats[mem_type].purgeable += sz;
+	}
+}
+
+static void show_meminfo(struct drm_printer *p, struct drm_file *file)
+{
+	static const char *const mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
+		[XE_PL_SYSTEM] = "system",
+		[XE_PL_TT] = "gtt",
+		[XE_PL_VRAM0] = "vram0",
+		[XE_PL_VRAM1] = "vram1",
+		[4 ... 6] = NULL,
+		[XE_PL_STOLEN] = "stolen"
+	};
+	struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {};
+	struct xe_file *xef = file->driver_priv;
+	struct ttm_device *bdev = &xef->xe->ttm;
+	struct ttm_resource_manager *man;
+	struct xe_drm_client *client;
+	struct drm_gem_object *obj;
+	struct xe_bo *bo;
+	unsigned int id;
+	u32 mem_type;
+
+	client = xef->client;
+
+	/* Public objects. */
+	spin_lock(&file->table_lock);
+	idr_for_each_entry(&file->object_idr, obj, id) {
+		struct xe_bo *bo = gem_to_xe_bo(obj);
+
+		bo_meminfo(bo, stats);
+	}
+	spin_unlock(&file->table_lock);
+
+	/* Internal objects. */
+	spin_lock(&client->bos_lock);
+	list_for_each_entry_rcu(bo, &client->bos_list, client_link) {
+		if (!bo || !kref_get_unless_zero(&bo->ttm.base.refcount))
+			continue;
+		bo_meminfo(bo, stats);
+		xe_bo_put(bo);
+	}
+	spin_unlock(&client->bos_lock);
+
+	for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) {
+		if (!mem_type_to_name[mem_type])
+			continue;
+
+		man = ttm_manager_type(bdev, mem_type);
+
+		if (man) {
+			drm_print_memory_stats(p,
+					       &stats[mem_type],
+					       DRM_GEM_OBJECT_RESIDENT |
+					       (mem_type != XE_PL_SYSTEM ? 0 :
+					       DRM_GEM_OBJECT_PURGEABLE),
+					       mem_type_to_name[mem_type]);
+		}
+	}
+}
+
+/**
+ * xe_drm_client_fdinfo() - Callback for fdinfo interface
+ * @p: The drm_printer ptr
+ * @file: The drm_file ptr
+ *
+ * This is callabck for drm fdinfo interface. Register this callback
+ * in drm driver ops for show_fdinfo.
+ *
+ * Return: void
+ */
+void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
+{
+	show_meminfo(p, file);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_drm_client.h b/drivers/gpu/drm/xe/xe_drm_client.h
new file mode 100644
index 000000000000..a9649aa36011
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drm_client.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_DRM_CLIENT_H_
+#define _XE_DRM_CLIENT_H_
+
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/pid.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+struct drm_file;
+struct drm_printer;
+struct xe_bo;
+
+struct xe_drm_client {
+	struct kref kref;
+	unsigned int id;
+#ifdef CONFIG_PROC_FS
+	/**
+	 * @bos_lock: lock protecting @bos_list
+	 */
+	spinlock_t bos_lock;
+	/**
+	 * @bos_list: list of bos created by this client
+	 *
+	 * Protected by @bos_lock.
+	 */
+	struct list_head bos_list;
+#endif
+};
+
+	static inline struct xe_drm_client *
+xe_drm_client_get(struct xe_drm_client *client)
+{
+	kref_get(&client->kref);
+	return client;
+}
+
+void __xe_drm_client_free(struct kref *kref);
+
+static inline void xe_drm_client_put(struct xe_drm_client *client)
+{
+	kref_put(&client->kref, __xe_drm_client_free);
+}
+
+struct xe_drm_client *xe_drm_client_alloc(void);
+static inline struct xe_drm_client *
+xe_drm_client_get(struct xe_drm_client *client);
+static inline void xe_drm_client_put(struct xe_drm_client *client);
+#ifdef CONFIG_PROC_FS
+void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
+void xe_drm_client_add_bo(struct xe_drm_client *client,
+			  struct xe_bo *bo);
+void xe_drm_client_remove_bo(struct xe_bo *bo);
+#else
+static inline void xe_drm_client_add_bo(struct xe_drm_client *client,
+					struct xe_bo *bo)
+{
+}
+
+static inline void xe_drm_client_remove_bo(struct xe_bo *bo)
+{
+}
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h
new file mode 100644
index 000000000000..d45b71426cc8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drv.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_DRV_H_
+#define _XE_DRV_H_
+
+#include <drm/drm_drv.h>
+
+#define DRIVER_NAME		"xe"
+#define DRIVER_DESC		"Intel Xe Graphics"
+#define DRIVER_DATE		"20201103"
+
+/* Interface history:
+ *
+ * 1.1: Original.
+ */
+#define DRIVER_MAJOR		1
+#define DRIVER_MINOR		1
+#define DRIVER_PATCHLEVEL	0
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
new file mode 100644
index 000000000000..d30c0d0689bc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_exec.h"
+
+#include <drm/drm_device.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_file.h>
+#include <drm/xe_drm.h>
+#include <linux/delay.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_macros.h"
+#include "xe_ring_ops_types.h"
+#include "xe_sched_job.h"
+#include "xe_sync.h"
+#include "xe_vm.h"
+
+/**
+ * DOC: Execbuf (User GPU command submission)
+ *
+ * Execs have historically been rather complicated in DRM drivers (at least in
+ * the i915) because a few things:
+ *
+ * - Passing in a list BO which are read / written to creating implicit syncs
+ * - Binding at exec time
+ * - Flow controlling the ring at exec time
+ *
+ * In XE we avoid all of this complication by not allowing a BO list to be
+ * passed into an exec, using the dma-buf implicit sync uAPI, have binds as
+ * seperate operations, and using the DRM scheduler to flow control the ring.
+ * Let's deep dive on each of these.
+ *
+ * We can get away from a BO list by forcing the user to use in / out fences on
+ * every exec rather than the kernel tracking dependencies of BO (e.g. if the
+ * user knows an exec writes to a BO and reads from the BO in the next exec, it
+ * is the user's responsibility to pass in / out fence between the two execs).
+ *
+ * Implicit dependencies for external BOs are handled by using the dma-buf
+ * implicit dependency uAPI (TODO: add link). To make this works each exec must
+ * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external
+ * BO mapped in the VM.
+ *
+ * We do not allow a user to trigger a bind at exec time rather we have a VM
+ * bind IOCTL which uses the same in / out fence interface as exec. In that
+ * sense, a VM bind is basically the same operation as an exec from the user
+ * perspective. e.g. If an exec depends on a VM bind use the in / out fence
+ * interface (struct drm_xe_sync) to synchronize like syncing between two
+ * dependent execs.
+ *
+ * Although a user cannot trigger a bind, we still have to rebind userptrs in
+ * the VM that have been invalidated since the last exec, likewise we also have
+ * to rebind BOs that have been evicted by the kernel. We schedule these rebinds
+ * behind any pending kernel operations on any external BOs in VM or any BOs
+ * private to the VM. This is accomplished by the rebinds waiting on BOs
+ * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs
+ * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and
+ * in DMA_RESV_USAGE_WRITE for external BOs).
+ *
+ * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute
+ * mode VMs we use preempt fences and a rebind worker (TODO: add link).
+ *
+ * There is no need to flow control the ring in the exec as we write the ring at
+ * submission time and set the DRM scheduler max job limit SIZE_OF_RING /
+ * MAX_JOB_SIZE. The DRM scheduler will then hold all jobs until space in the
+ * ring is available.
+ *
+ * All of this results in a rather simple exec implementation.
+ *
+ * Flow
+ * ~~~~
+ *
+ * .. code-block::
+ *
+ *	Parse input arguments
+ *	Wait for any async VM bind passed as in-fences to start
+ *	<----------------------------------------------------------------------|
+ *	Lock global VM lock in read mode                                       |
+ *	Pin userptrs (also finds userptr invalidated since last exec)          |
+ *	Lock exec (VM dma-resv lock, external BOs dma-resv locks)              |
+ *	Validate BOs that have been evicted                                    |
+ *	Create job                                                             |
+ *	Rebind invalidated userptrs + evicted BOs (non-compute-mode)           |
+ *	Add rebind fence dependency to job                                     |
+ *	Add job VM dma-resv bookkeeping slot (non-compute mode)                |
+ *	Add job to external BOs dma-resv write slots (non-compute mode)        |
+ *	Check if any userptrs invalidated since pin ------ Drop locks ---------|
+ *	Install in / out fences for job
+ *	Submit job
+ *	Unlock all
+ */
+
+static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
+{
+	return drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
+}
+
+int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_exec *args = data;
+	struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs);
+	u64 __user *addresses_user = u64_to_user_ptr(args->address);
+	struct xe_exec_queue *q;
+	struct xe_sync_entry *syncs = NULL;
+	u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
+	struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn};
+	struct drm_exec *exec = &vm_exec.exec;
+	u32 i, num_syncs = 0;
+	struct xe_sched_job *job;
+	struct dma_fence *rebind_fence;
+	struct xe_vm *vm;
+	bool write_locked;
+	ktime_t end = 0;
+	int err = 0;
+
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+	if (XE_IOCTL_DBG(xe, !q))
+		return -ENOENT;
+
+	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->num_batch_buffer &&
+			 q->width != args->num_batch_buffer))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) {
+		err = -ECANCELED;
+		goto err_exec_queue;
+	}
+
+	if (args->num_syncs) {
+		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
+		if (!syncs) {
+			err = -ENOMEM;
+			goto err_exec_queue;
+		}
+	}
+
+	vm = q->vm;
+
+	for (i = 0; i < args->num_syncs; i++) {
+		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
+					  &syncs_user[i], SYNC_PARSE_FLAG_EXEC |
+					  (xe_vm_in_lr_mode(vm) ?
+					   SYNC_PARSE_FLAG_LR_MODE : 0));
+		if (err)
+			goto err_syncs;
+	}
+
+	if (xe_exec_queue_is_parallel(q)) {
+		err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
+				       q->width);
+		if (err) {
+			err = -EFAULT;
+			goto err_syncs;
+		}
+	}
+
+retry:
+	if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) {
+		err = down_write_killable(&vm->lock);
+		write_locked = true;
+	} else {
+		/* We don't allow execs while the VM is in error state */
+		err = down_read_interruptible(&vm->lock);
+		write_locked = false;
+	}
+	if (err)
+		goto err_syncs;
+
+	if (write_locked) {
+		err = xe_vm_userptr_pin(vm);
+		downgrade_write(&vm->lock);
+		write_locked = false;
+		if (err)
+			goto err_unlock_list;
+	}
+
+	vm_exec.vm = &vm->gpuvm;
+	vm_exec.num_fences = 1 + vm->xe->info.tile_count;
+	vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
+	if (xe_vm_in_lr_mode(vm)) {
+		drm_exec_init(exec, vm_exec.flags, 0);
+	} else {
+		err = drm_gpuvm_exec_lock(&vm_exec);
+		if (err) {
+			if (xe_vm_validate_should_retry(exec, err, &end))
+				err = -EAGAIN;
+			goto err_unlock_list;
+		}
+	}
+
+	if (xe_vm_is_closed_or_banned(q->vm)) {
+		drm_warn(&xe->drm, "Trying to schedule after vm is closed or banned\n");
+		err = -ECANCELED;
+		goto err_exec;
+	}
+
+	if (!args->num_batch_buffer) {
+		if (!xe_vm_in_lr_mode(vm)) {
+			struct dma_fence *fence;
+
+			fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm);
+			if (IS_ERR(fence)) {
+				err = PTR_ERR(fence);
+				goto err_exec;
+			}
+			for (i = 0; i < num_syncs; i++)
+				xe_sync_entry_signal(&syncs[i], NULL, fence);
+			xe_exec_queue_last_fence_set(q, vm, fence);
+			dma_fence_put(fence);
+		}
+
+		goto err_exec;
+	}
+
+	if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
+		err = -EWOULDBLOCK;
+		goto err_exec;
+	}
+
+	job = xe_sched_job_create(q, xe_exec_queue_is_parallel(q) ?
+				  addresses : &args->address);
+	if (IS_ERR(job)) {
+		err = PTR_ERR(job);
+		goto err_exec;
+	}
+
+	/*
+	 * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
+	 * VM mode only.
+	 */
+	rebind_fence = xe_vm_rebind(vm, false);
+	if (IS_ERR(rebind_fence)) {
+		err = PTR_ERR(rebind_fence);
+		goto err_put_job;
+	}
+
+	/*
+	 * We store the rebind_fence in the VM so subsequent execs don't get
+	 * scheduled before the rebinds of userptrs / evicted BOs is complete.
+	 */
+	if (rebind_fence) {
+		dma_fence_put(vm->rebind_fence);
+		vm->rebind_fence = rebind_fence;
+	}
+	if (vm->rebind_fence) {
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+			     &vm->rebind_fence->flags)) {
+			dma_fence_put(vm->rebind_fence);
+			vm->rebind_fence = NULL;
+		} else {
+			dma_fence_get(vm->rebind_fence);
+			err = drm_sched_job_add_dependency(&job->drm,
+							   vm->rebind_fence);
+			if (err)
+				goto err_put_job;
+		}
+	}
+
+	/* Wait behind munmap style rebinds */
+	if (!xe_vm_in_lr_mode(vm)) {
+		err = drm_sched_job_add_resv_dependencies(&job->drm,
+							  xe_vm_resv(vm),
+							  DMA_RESV_USAGE_KERNEL);
+		if (err)
+			goto err_put_job;
+	}
+
+	for (i = 0; i < num_syncs && !err; i++)
+		err = xe_sync_entry_add_deps(&syncs[i], job);
+	if (err)
+		goto err_put_job;
+
+	if (!xe_vm_in_lr_mode(vm)) {
+		err = xe_sched_job_last_fence_add_dep(job, vm);
+		if (err)
+			goto err_put_job;
+
+		err = down_read_interruptible(&vm->userptr.notifier_lock);
+		if (err)
+			goto err_put_job;
+
+		err = __xe_vm_userptr_needs_repin(vm);
+		if (err)
+			goto err_repin;
+	}
+
+	/*
+	 * Point of no return, if we error after this point just set an error on
+	 * the job and let the DRM scheduler / backend clean up the job.
+	 */
+	xe_sched_job_arm(job);
+	if (!xe_vm_in_lr_mode(vm))
+		drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished,
+					 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
+
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], job,
+				     &job->drm.s_fence->finished);
+
+	if (xe_exec_queue_is_lr(q))
+		q->ring_ops->emit_job(job);
+	if (!xe_vm_in_lr_mode(vm))
+		xe_exec_queue_last_fence_set(q, vm, &job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+	xe_vm_reactivate_rebind(vm);
+
+	if (!err && !xe_vm_in_lr_mode(vm)) {
+		spin_lock(&xe->ttm.lru_lock);
+		ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+		spin_unlock(&xe->ttm.lru_lock);
+	}
+
+err_repin:
+	if (!xe_vm_in_lr_mode(vm))
+		up_read(&vm->userptr.notifier_lock);
+err_put_job:
+	if (err)
+		xe_sched_job_put(job);
+err_exec:
+	drm_exec_fini(exec);
+err_unlock_list:
+	if (write_locked)
+		up_write(&vm->lock);
+	else
+		up_read(&vm->lock);
+	if (err == -EAGAIN)
+		goto retry;
+err_syncs:
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_cleanup(&syncs[i]);
+	kfree(syncs);
+err_exec_queue:
+	xe_exec_queue_put(q);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_exec.h b/drivers/gpu/drm/xe/xe_exec.h
new file mode 100644
index 000000000000..e4932494cea3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_EXEC_H_
+#define _XE_EXEC_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
new file mode 100644
index 000000000000..44fe8097b7cd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -0,0 +1,956 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_exec_queue.h"
+
+#include <linux/nospec.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_class_sysfs.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_migrate.h"
+#include "xe_pm.h"
+#include "xe_ring_ops_types.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+enum xe_exec_queue_sched_prop {
+	XE_EXEC_QUEUE_JOB_TIMEOUT = 0,
+	XE_EXEC_QUEUE_TIMESLICE = 1,
+	XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2,
+	XE_EXEC_QUEUE_SCHED_PROP_MAX = 3,
+};
+
+static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
+						    struct xe_vm *vm,
+						    u32 logical_mask,
+						    u16 width, struct xe_hw_engine *hwe,
+						    u32 flags)
+{
+	struct xe_exec_queue *q;
+	struct xe_gt *gt = hwe->gt;
+	int err;
+	int i;
+
+	/* only kernel queues can be permanent */
+	XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
+
+	q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
+	if (!q)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&q->refcount);
+	q->flags = flags;
+	q->hwe = hwe;
+	q->gt = gt;
+	if (vm)
+		q->vm = xe_vm_get(vm);
+	q->class = hwe->class;
+	q->width = width;
+	q->logical_mask = logical_mask;
+	q->fence_irq = &gt->fence_irq[hwe->class];
+	q->ring_ops = gt->ring_ops[hwe->class];
+	q->ops = gt->exec_queue_ops;
+	INIT_LIST_HEAD(&q->persistent.link);
+	INIT_LIST_HEAD(&q->compute.link);
+	INIT_LIST_HEAD(&q->multi_gt_link);
+
+	q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
+	q->sched_props.preempt_timeout_us =
+				hwe->eclass->sched_props.preempt_timeout_us;
+
+	if (xe_exec_queue_is_parallel(q)) {
+		q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
+		q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
+	}
+	if (q->flags & EXEC_QUEUE_FLAG_VM) {
+		q->bind.fence_ctx = dma_fence_context_alloc(1);
+		q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO;
+	}
+
+	for (i = 0; i < width; ++i) {
+		err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K);
+		if (err)
+			goto err_lrc;
+	}
+
+	err = q->ops->init(q);
+	if (err)
+		goto err_lrc;
+
+	/*
+	 * Normally the user vm holds an rpm ref to keep the device
+	 * awake, and the context holds a ref for the vm, however for
+	 * some engines we use the kernels migrate vm underneath which offers no
+	 * such rpm ref, or we lack a vm. Make sure we keep a ref here, so we
+	 * can perform GuC CT actions when needed. Caller is expected to have
+	 * already grabbed the rpm ref outside any sensitive locks.
+	 */
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !vm))
+		drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe));
+
+	return q;
+
+err_lrc:
+	for (i = i - 1; i >= 0; --i)
+		xe_lrc_finish(q->lrc + i);
+	kfree(q);
+	return ERR_PTR(err);
+}
+
+struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
+					   u32 logical_mask, u16 width,
+					   struct xe_hw_engine *hwe, u32 flags)
+{
+	struct xe_exec_queue *q;
+	int err;
+
+	if (vm) {
+		err = xe_vm_lock(vm, true);
+		if (err)
+			return ERR_PTR(err);
+	}
+	q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags);
+	if (vm)
+		xe_vm_unlock(vm);
+
+	return q;
+}
+
+struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
+						 struct xe_vm *vm,
+						 enum xe_engine_class class, u32 flags)
+{
+	struct xe_hw_engine *hwe, *hwe0 = NULL;
+	enum xe_hw_engine_id id;
+	u32 logical_mask = 0;
+
+	for_each_hw_engine(hwe, gt, id) {
+		if (xe_hw_engine_is_reserved(hwe))
+			continue;
+
+		if (hwe->class == class) {
+			logical_mask |= BIT(hwe->logical_instance);
+			if (!hwe0)
+				hwe0 = hwe;
+		}
+	}
+
+	if (!logical_mask)
+		return ERR_PTR(-ENODEV);
+
+	return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags);
+}
+
+void xe_exec_queue_destroy(struct kref *ref)
+{
+	struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
+	struct xe_exec_queue *eq, *next;
+
+	xe_exec_queue_last_fence_put_unlocked(q);
+	if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
+		list_for_each_entry_safe(eq, next, &q->multi_gt_list,
+					 multi_gt_link)
+			xe_exec_queue_put(eq);
+	}
+
+	q->ops->fini(q);
+}
+
+void xe_exec_queue_fini(struct xe_exec_queue *q)
+{
+	int i;
+
+	for (i = 0; i < q->width; ++i)
+		xe_lrc_finish(q->lrc + i);
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
+		xe_device_mem_access_put(gt_to_xe(q->gt));
+	if (q->vm)
+		xe_vm_put(q->vm);
+
+	kfree(q);
+}
+
+void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
+{
+	switch (q->class) {
+	case XE_ENGINE_CLASS_RENDER:
+		sprintf(q->name, "rcs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		sprintf(q->name, "vcs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		sprintf(q->name, "vecs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_COPY:
+		sprintf(q->name, "bcs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_COMPUTE:
+		sprintf(q->name, "ccs%d", instance);
+		break;
+	case XE_ENGINE_CLASS_OTHER:
+		sprintf(q->name, "gsccs%d", instance);
+		break;
+	default:
+		XE_WARN_ON(q->class);
+	}
+}
+
+struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id)
+{
+	struct xe_exec_queue *q;
+
+	mutex_lock(&xef->exec_queue.lock);
+	q = xa_load(&xef->exec_queue.xa, id);
+	if (q)
+		xe_exec_queue_get(q);
+	mutex_unlock(&xef->exec_queue.lock);
+
+	return q;
+}
+
+enum xe_exec_queue_priority
+xe_exec_queue_device_get_max_priority(struct xe_device *xe)
+{
+	return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH :
+				       XE_EXEC_QUEUE_PRIORITY_NORMAL;
+}
+
+static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q,
+				   u64 value, bool create)
+{
+	if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe)))
+		return -EPERM;
+
+	return q->ops->set_priority(q, value);
+}
+
+static bool xe_exec_queue_enforce_schedule_limit(void)
+{
+#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
+	return true;
+#else
+	return !capable(CAP_SYS_NICE);
+#endif
+}
+
+static void
+xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass,
+			      enum xe_exec_queue_sched_prop prop,
+			      u32 *min, u32 *max)
+{
+	switch (prop) {
+	case XE_EXEC_QUEUE_JOB_TIMEOUT:
+		*min = eclass->sched_props.job_timeout_min;
+		*max = eclass->sched_props.job_timeout_max;
+		break;
+	case XE_EXEC_QUEUE_TIMESLICE:
+		*min = eclass->sched_props.timeslice_min;
+		*max = eclass->sched_props.timeslice_max;
+		break;
+	case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
+		*min = eclass->sched_props.preempt_timeout_min;
+		*max = eclass->sched_props.preempt_timeout_max;
+		break;
+	default:
+		break;
+	}
+#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
+	if (capable(CAP_SYS_NICE)) {
+		switch (prop) {
+		case XE_EXEC_QUEUE_JOB_TIMEOUT:
+			*min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
+			*max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
+			break;
+		case XE_EXEC_QUEUE_TIMESLICE:
+			*min = XE_HW_ENGINE_TIMESLICE_MIN;
+			*max = XE_HW_ENGINE_TIMESLICE_MAX;
+			break;
+		case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
+			*min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
+			*max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
+			break;
+		default:
+			break;
+		}
+	}
+#endif
+}
+
+static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q,
+				    u64 value, bool create)
+{
+	u32 min = 0, max = 0;
+
+	xe_exec_queue_get_prop_minmax(q->hwe->eclass,
+				      XE_EXEC_QUEUE_TIMESLICE, &min, &max);
+
+	if (xe_exec_queue_enforce_schedule_limit() &&
+	    !xe_hw_engine_timeout_in_range(value, min, max))
+		return -EINVAL;
+
+	return q->ops->set_timeslice(q, value);
+}
+
+static int exec_queue_set_preemption_timeout(struct xe_device *xe,
+					     struct xe_exec_queue *q, u64 value,
+					     bool create)
+{
+	u32 min = 0, max = 0;
+
+	xe_exec_queue_get_prop_minmax(q->hwe->eclass,
+				      XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max);
+
+	if (xe_exec_queue_enforce_schedule_limit() &&
+	    !xe_hw_engine_timeout_in_range(value, min, max))
+		return -EINVAL;
+
+	return q->ops->set_preempt_timeout(q, value);
+}
+
+static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q,
+				      u64 value, bool create)
+{
+	if (XE_IOCTL_DBG(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, xe_vm_in_preempt_fence_mode(q->vm)))
+		return -EINVAL;
+
+	if (value)
+		q->flags |= EXEC_QUEUE_FLAG_PERSISTENT;
+	else
+		q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT;
+
+	return 0;
+}
+
+static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q,
+				      u64 value, bool create)
+{
+	u32 min = 0, max = 0;
+
+	if (XE_IOCTL_DBG(xe, !create))
+		return -EINVAL;
+
+	xe_exec_queue_get_prop_minmax(q->hwe->eclass,
+				      XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max);
+
+	if (xe_exec_queue_enforce_schedule_limit() &&
+	    !xe_hw_engine_timeout_in_range(value, min, max))
+		return -EINVAL;
+
+	return q->ops->set_job_timeout(q, value);
+}
+
+static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q,
+				      u64 value, bool create)
+{
+	if (XE_IOCTL_DBG(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
+		return -EINVAL;
+
+	q->usm.acc_trigger = value;
+
+	return 0;
+}
+
+static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q,
+				     u64 value, bool create)
+{
+	if (XE_IOCTL_DBG(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
+		return -EINVAL;
+
+	q->usm.acc_notify = value;
+
+	return 0;
+}
+
+static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q,
+					  u64 value, bool create)
+{
+	if (XE_IOCTL_DBG(xe, !create))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
+		return -EINVAL;
+
+	if (value > DRM_XE_ACC_GRANULARITY_64M)
+		return -EINVAL;
+
+	q->usm.acc_granularity = value;
+
+	return 0;
+}
+
+typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
+					     struct xe_exec_queue *q,
+					     u64 value, bool create);
+
+static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify,
+	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity,
+};
+
+static int exec_queue_user_ext_set_property(struct xe_device *xe,
+					    struct xe_exec_queue *q,
+					    u64 extension,
+					    bool create)
+{
+	u64 __user *address = u64_to_user_ptr(extension);
+	struct drm_xe_ext_set_property ext;
+	int err;
+	u32 idx;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_DBG(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_DBG(xe, ext.property >=
+			 ARRAY_SIZE(exec_queue_set_property_funcs)) ||
+	    XE_IOCTL_DBG(xe, ext.pad))
+		return -EINVAL;
+
+	idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
+	return exec_queue_set_property_funcs[idx](xe, q, ext.value,  create);
+}
+
+typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe,
+					       struct xe_exec_queue *q,
+					       u64 extension,
+					       bool create);
+
+static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = {
+	[DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS	16
+static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
+				      u64 extensions, int ext_number, bool create)
+{
+	u64 __user *address = u64_to_user_ptr(extensions);
+	struct drm_xe_user_extension ext;
+	int err;
+	u32 idx;
+
+	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
+		return -E2BIG;
+
+	err = __copy_from_user(&ext, address, sizeof(ext));
+	if (XE_IOCTL_DBG(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_DBG(xe, ext.pad) ||
+	    XE_IOCTL_DBG(xe, ext.name >=
+			 ARRAY_SIZE(exec_queue_user_extension_funcs)))
+		return -EINVAL;
+
+	idx = array_index_nospec(ext.name,
+				 ARRAY_SIZE(exec_queue_user_extension_funcs));
+	err = exec_queue_user_extension_funcs[idx](xe, q, extensions, create);
+	if (XE_IOCTL_DBG(xe, err))
+		return err;
+
+	if (ext.next_extension)
+		return exec_queue_user_extensions(xe, q, ext.next_extension,
+					      ++ext_number, create);
+
+	return 0;
+}
+
+static const enum xe_engine_class user_to_xe_engine_class[] = {
+	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
+	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
+	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
+	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
+};
+
+static struct xe_hw_engine *
+find_hw_engine(struct xe_device *xe,
+	       struct drm_xe_engine_class_instance eci)
+{
+	u32 idx;
+
+	if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
+		return NULL;
+
+	if (eci.gt_id >= xe->info.gt_count)
+		return NULL;
+
+	idx = array_index_nospec(eci.engine_class,
+				 ARRAY_SIZE(user_to_xe_engine_class));
+
+	return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
+			       user_to_xe_engine_class[idx],
+			       eci.engine_instance, true);
+}
+
+static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
+					struct drm_xe_engine_class_instance *eci,
+					u16 width, u16 num_placements)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 logical_mask = 0;
+
+	if (XE_IOCTL_DBG(xe, width != 1))
+		return 0;
+	if (XE_IOCTL_DBG(xe, num_placements != 1))
+		return 0;
+	if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0))
+		return 0;
+
+	eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY;
+
+	for_each_hw_engine(hwe, gt, id) {
+		if (xe_hw_engine_is_reserved(hwe))
+			continue;
+
+		if (hwe->class ==
+		    user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY])
+			logical_mask |= BIT(hwe->logical_instance);
+	}
+
+	return logical_mask;
+}
+
+static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
+				      struct drm_xe_engine_class_instance *eci,
+				      u16 width, u16 num_placements)
+{
+	int len = width * num_placements;
+	int i, j, n;
+	u16 class;
+	u16 gt_id;
+	u32 return_mask = 0, prev_mask;
+
+	if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) &&
+			 len > 1))
+		return 0;
+
+	for (i = 0; i < width; ++i) {
+		u32 current_mask = 0;
+
+		for (j = 0; j < num_placements; ++j) {
+			struct xe_hw_engine *hwe;
+
+			n = j * width + i;
+
+			hwe = find_hw_engine(xe, eci[n]);
+			if (XE_IOCTL_DBG(xe, !hwe))
+				return 0;
+
+			if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe)))
+				return 0;
+
+			if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) ||
+			    XE_IOCTL_DBG(xe, n && eci[n].engine_class != class))
+				return 0;
+
+			class = eci[n].engine_class;
+			gt_id = eci[n].gt_id;
+
+			if (width == 1 || !i)
+				return_mask |= BIT(eci[n].engine_instance);
+			current_mask |= BIT(eci[n].engine_instance);
+		}
+
+		/* Parallel submissions must be logically contiguous */
+		if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1))
+			return 0;
+
+		prev_mask = current_mask;
+	}
+
+	return return_mask;
+}
+
+int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_exec_queue_create *args = data;
+	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
+	struct drm_xe_engine_class_instance __user *user_eci =
+		u64_to_user_ptr(args->instances);
+	struct xe_hw_engine *hwe;
+	struct xe_vm *vm, *migrate_vm;
+	struct xe_gt *gt;
+	struct xe_exec_queue *q = NULL;
+	u32 logical_mask;
+	u32 id;
+	u32 len;
+	int err;
+
+	if (XE_IOCTL_DBG(xe, args->flags) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	len = args->width * args->num_placements;
+	if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
+		return -EINVAL;
+
+	err = __copy_from_user(eci, user_eci,
+			       sizeof(struct drm_xe_engine_class_instance) *
+			       len);
+	if (XE_IOCTL_DBG(xe, err))
+		return -EFAULT;
+
+	if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count))
+		return -EINVAL;
+
+	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
+		for_each_gt(gt, xe, id) {
+			struct xe_exec_queue *new;
+
+			if (xe_gt_is_media_type(gt))
+				continue;
+
+			eci[0].gt_id = gt->info.id;
+			logical_mask = bind_exec_queue_logical_mask(xe, gt, eci,
+								    args->width,
+								    args->num_placements);
+			if (XE_IOCTL_DBG(xe, !logical_mask))
+				return -EINVAL;
+
+			hwe = find_hw_engine(xe, eci[0]);
+			if (XE_IOCTL_DBG(xe, !hwe))
+				return -EINVAL;
+
+			/* The migration vm doesn't hold rpm ref */
+			xe_device_mem_access_get(xe);
+
+			migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
+			new = xe_exec_queue_create(xe, migrate_vm, logical_mask,
+						   args->width, hwe,
+						   EXEC_QUEUE_FLAG_PERSISTENT |
+						   EXEC_QUEUE_FLAG_VM |
+						   (id ?
+						    EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
+						    0));
+
+			xe_device_mem_access_put(xe); /* now held by engine */
+
+			xe_vm_put(migrate_vm);
+			if (IS_ERR(new)) {
+				err = PTR_ERR(new);
+				if (q)
+					goto put_exec_queue;
+				return err;
+			}
+			if (id == 0)
+				q = new;
+			else
+				list_add_tail(&new->multi_gt_list,
+					      &q->multi_gt_link);
+		}
+	} else {
+		gt = xe_device_get_gt(xe, eci[0].gt_id);
+		logical_mask = calc_validate_logical_mask(xe, gt, eci,
+							  args->width,
+							  args->num_placements);
+		if (XE_IOCTL_DBG(xe, !logical_mask))
+			return -EINVAL;
+
+		hwe = find_hw_engine(xe, eci[0]);
+		if (XE_IOCTL_DBG(xe, !hwe))
+			return -EINVAL;
+
+		vm = xe_vm_lookup(xef, args->vm_id);
+		if (XE_IOCTL_DBG(xe, !vm))
+			return -ENOENT;
+
+		err = down_read_interruptible(&vm->lock);
+		if (err) {
+			xe_vm_put(vm);
+			return err;
+		}
+
+		if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
+			up_read(&vm->lock);
+			xe_vm_put(vm);
+			return -ENOENT;
+		}
+
+		q = xe_exec_queue_create(xe, vm, logical_mask,
+					 args->width, hwe,
+					 xe_vm_in_lr_mode(vm) ? 0 :
+					 EXEC_QUEUE_FLAG_PERSISTENT);
+		up_read(&vm->lock);
+		xe_vm_put(vm);
+		if (IS_ERR(q))
+			return PTR_ERR(q);
+
+		if (xe_vm_in_preempt_fence_mode(vm)) {
+			q->compute.context = dma_fence_context_alloc(1);
+			spin_lock_init(&q->compute.lock);
+
+			err = xe_vm_add_compute_exec_queue(vm, q);
+			if (XE_IOCTL_DBG(xe, err))
+				goto put_exec_queue;
+		}
+	}
+
+	if (args->extensions) {
+		err = exec_queue_user_extensions(xe, q, args->extensions, 0, true);
+		if (XE_IOCTL_DBG(xe, err))
+			goto kill_exec_queue;
+	}
+
+	q->persistent.xef = xef;
+
+	mutex_lock(&xef->exec_queue.lock);
+	err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
+	mutex_unlock(&xef->exec_queue.lock);
+	if (err)
+		goto kill_exec_queue;
+
+	args->exec_queue_id = id;
+
+	return 0;
+
+kill_exec_queue:
+	xe_exec_queue_kill(q);
+put_exec_queue:
+	xe_exec_queue_put(q);
+	return err;
+}
+
+int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_exec_queue_get_property *args = data;
+	struct xe_exec_queue *q;
+	int ret;
+
+	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+	if (XE_IOCTL_DBG(xe, !q))
+		return -ENOENT;
+
+	switch (args->property) {
+	case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN:
+		args->value = !!(q->flags & EXEC_QUEUE_FLAG_BANNED);
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	xe_exec_queue_put(q);
+
+	return ret;
+}
+
+/**
+ * xe_exec_queue_is_lr() - Whether an exec_queue is long-running
+ * @q: The exec_queue
+ *
+ * Return: True if the exec_queue is long-running, false otherwise.
+ */
+bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
+{
+	return q->vm && xe_vm_in_lr_mode(q->vm) &&
+		!(q->flags & EXEC_QUEUE_FLAG_VM);
+}
+
+static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q)
+{
+	return q->lrc->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc) - 1;
+}
+
+/**
+ * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full
+ * @q: The exec_queue
+ *
+ * Return: True if the exec_queue's ring is full, false otherwise.
+ */
+bool xe_exec_queue_ring_full(struct xe_exec_queue *q)
+{
+	struct xe_lrc *lrc = q->lrc;
+	s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES;
+
+	return xe_exec_queue_num_job_inflight(q) >= max_job;
+}
+
+/**
+ * xe_exec_queue_is_idle() - Whether an exec_queue is idle.
+ * @q: The exec_queue
+ *
+ * FIXME: Need to determine what to use as the short-lived
+ * timeline lock for the exec_queues, so that the return value
+ * of this function becomes more than just an advisory
+ * snapshot in time. The timeline lock must protect the
+ * seqno from racing submissions on the same exec_queue.
+ * Typically vm->resv, but user-created timeline locks use the migrate vm
+ * and never grabs the migrate vm->resv so we have a race there.
+ *
+ * Return: True if the exec_queue is idle, false otherwise.
+ */
+bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
+{
+	if (xe_exec_queue_is_parallel(q)) {
+		int i;
+
+		for (i = 0; i < q->width; ++i) {
+			if (xe_lrc_seqno(&q->lrc[i]) !=
+			    q->lrc[i].fence_ctx.next_seqno - 1)
+				return false;
+		}
+
+		return true;
+	}
+
+	return xe_lrc_seqno(&q->lrc[0]) ==
+		q->lrc[0].fence_ctx.next_seqno - 1;
+}
+
+void xe_exec_queue_kill(struct xe_exec_queue *q)
+{
+	struct xe_exec_queue *eq = q, *next;
+
+	list_for_each_entry_safe(eq, next, &eq->multi_gt_list,
+				 multi_gt_link) {
+		q->ops->kill(eq);
+		xe_vm_remove_compute_exec_queue(q->vm, eq);
+	}
+
+	q->ops->kill(q);
+	xe_vm_remove_compute_exec_queue(q->vm, q);
+}
+
+int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_exec_queue_destroy *args = data;
+	struct xe_exec_queue *q;
+
+	if (XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	mutex_lock(&xef->exec_queue.lock);
+	q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id);
+	mutex_unlock(&xef->exec_queue.lock);
+	if (XE_IOCTL_DBG(xe, !q))
+		return -ENOENT;
+
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERSISTENT))
+		xe_exec_queue_kill(q);
+	else
+		xe_device_add_persistent_exec_queues(xe, q);
+
+	trace_xe_exec_queue_close(q);
+	xe_exec_queue_put(q);
+
+	return 0;
+}
+
+static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
+						    struct xe_vm *vm)
+{
+	if (q->flags & EXEC_QUEUE_FLAG_VM)
+		lockdep_assert_held(&vm->lock);
+	else
+		xe_vm_assert_held(vm);
+}
+
+/**
+ * xe_exec_queue_last_fence_put() - Drop ref to last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ */
+void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm)
+{
+	xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+	if (q->last_fence) {
+		dma_fence_put(q->last_fence);
+		q->last_fence = NULL;
+	}
+}
+
+/**
+ * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked
+ * @q: The exec queue
+ *
+ * Only safe to be called from xe_exec_queue_destroy().
+ */
+void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q)
+{
+	if (q->last_fence) {
+		dma_fence_put(q->last_fence);
+		q->last_fence = NULL;
+	}
+}
+
+/**
+ * xe_exec_queue_last_fence_get() - Get last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ *
+ * Get last fence, does not take a ref
+ *
+ * Returns: last fence if not signaled, dma fence stub if signaled
+ */
+struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q,
+					       struct xe_vm *vm)
+{
+	xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+	if (q->last_fence &&
+	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
+		xe_exec_queue_last_fence_put(q, vm);
+
+	return q->last_fence ? q->last_fence : dma_fence_get_stub();
+}
+
+/**
+ * xe_exec_queue_last_fence_set() - Set last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ * @fence: The fence
+ *
+ * Set the last fence for the engine. Increases reference count for fence, when
+ * closing engine xe_exec_queue_last_fence_put should be called.
+ */
+void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
+				  struct dma_fence *fence)
+{
+	xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+	xe_exec_queue_last_fence_put(q, vm);
+	q->last_fence = dma_fence_get(fence);
+}
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
new file mode 100644
index 000000000000..d959cc4a1a82
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_EXEC_QUEUE_H_
+#define _XE_EXEC_QUEUE_H_
+
+#include "xe_exec_queue_types.h"
+#include "xe_vm_types.h"
+
+struct drm_device;
+struct drm_file;
+struct xe_device;
+struct xe_file;
+
+struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
+					   u32 logical_mask, u16 width,
+					   struct xe_hw_engine *hw_engine, u32 flags);
+struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
+						 struct xe_vm *vm,
+						 enum xe_engine_class class, u32 flags);
+
+void xe_exec_queue_fini(struct xe_exec_queue *q);
+void xe_exec_queue_destroy(struct kref *ref);
+void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance);
+
+struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id);
+
+static inline struct xe_exec_queue *xe_exec_queue_get(struct xe_exec_queue *q)
+{
+	kref_get(&q->refcount);
+	return q;
+}
+
+static inline void xe_exec_queue_put(struct xe_exec_queue *q)
+{
+	kref_put(&q->refcount, xe_exec_queue_destroy);
+}
+
+static inline bool xe_exec_queue_is_parallel(struct xe_exec_queue *q)
+{
+	return q->width > 1;
+}
+
+bool xe_exec_queue_is_lr(struct xe_exec_queue *q);
+
+bool xe_exec_queue_ring_full(struct xe_exec_queue *q);
+
+bool xe_exec_queue_is_idle(struct xe_exec_queue *q);
+
+void xe_exec_queue_kill(struct xe_exec_queue *q);
+
+int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file);
+int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file);
+enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe);
+
+void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm);
+void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e);
+struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e,
+					       struct xe_vm *vm);
+void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
+				  struct dma_fence *fence);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
new file mode 100644
index 000000000000..3d7e704ec3d9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -0,0 +1,222 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_EXEC_QUEUE_TYPES_H_
+#define _XE_EXEC_QUEUE_TYPES_H_
+
+#include <linux/kref.h>
+
+#include <drm/gpu_scheduler.h>
+
+#include "xe_gpu_scheduler_types.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence_types.h"
+#include "xe_lrc_types.h"
+
+struct xe_execlist_exec_queue;
+struct xe_gt;
+struct xe_guc_exec_queue;
+struct xe_hw_engine;
+struct xe_vm;
+
+enum xe_exec_queue_priority {
+	XE_EXEC_QUEUE_PRIORITY_UNSET = -2, /* For execlist usage only */
+	XE_EXEC_QUEUE_PRIORITY_LOW = 0,
+	XE_EXEC_QUEUE_PRIORITY_NORMAL,
+	XE_EXEC_QUEUE_PRIORITY_HIGH,
+	XE_EXEC_QUEUE_PRIORITY_KERNEL,
+
+	XE_EXEC_QUEUE_PRIORITY_COUNT
+};
+
+/**
+ * struct xe_exec_queue - Execution queue
+ *
+ * Contains all state necessary for submissions. Can either be a user object or
+ * a kernel object.
+ */
+struct xe_exec_queue {
+	/** @gt: graphics tile this exec queue can submit to */
+	struct xe_gt *gt;
+	/**
+	 * @hwe: A hardware of the same class. May (physical engine) or may not
+	 * (virtual engine) be where jobs actual engine up running. Should never
+	 * really be used for submissions.
+	 */
+	struct xe_hw_engine *hwe;
+	/** @refcount: ref count of this exec queue */
+	struct kref refcount;
+	/** @vm: VM (address space) for this exec queue */
+	struct xe_vm *vm;
+	/** @class: class of this exec queue */
+	enum xe_engine_class class;
+	/** @priority: priority of this exec queue */
+	enum xe_exec_queue_priority priority;
+	/**
+	 * @logical_mask: logical mask of where job submitted to exec queue can run
+	 */
+	u32 logical_mask;
+	/** @name: name of this exec queue */
+	char name[MAX_FENCE_NAME_LEN];
+	/** @width: width (number BB submitted per exec) of this exec queue */
+	u16 width;
+	/** @fence_irq: fence IRQ used to signal job completion */
+	struct xe_hw_fence_irq *fence_irq;
+
+	/**
+	 * @last_fence: last fence on exec queue, protected by vm->lock in write
+	 * mode if bind exec queue, protected by dma resv lock if non-bind exec
+	 * queue
+	 */
+	struct dma_fence *last_fence;
+
+/* queue no longer allowed to submit */
+#define EXEC_QUEUE_FLAG_BANNED			BIT(0)
+/* queue used for kernel submission only */
+#define EXEC_QUEUE_FLAG_KERNEL			BIT(1)
+/* kernel engine only destroyed at driver unload */
+#define EXEC_QUEUE_FLAG_PERMANENT		BIT(2)
+/* queue keeps running pending jobs after destroy ioctl */
+#define EXEC_QUEUE_FLAG_PERSISTENT		BIT(3)
+/* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */
+#define EXEC_QUEUE_FLAG_VM			BIT(4)
+/* child of VM queue for multi-tile VM jobs */
+#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(5)
+
+	/**
+	 * @flags: flags for this exec queue, should statically setup aside from ban
+	 * bit
+	 */
+	unsigned long flags;
+
+	union {
+		/** @multi_gt_list: list head for VM bind engines if multi-GT */
+		struct list_head multi_gt_list;
+		/** @multi_gt_link: link for VM bind engines if multi-GT */
+		struct list_head multi_gt_link;
+	};
+
+	union {
+		/** @execlist: execlist backend specific state for exec queue */
+		struct xe_execlist_exec_queue *execlist;
+		/** @guc: GuC backend specific state for exec queue */
+		struct xe_guc_exec_queue *guc;
+	};
+
+	/**
+	 * @persistent: persistent exec queue state
+	 */
+	struct {
+		/** @xef: file which this exec queue belongs to */
+		struct xe_file *xef;
+		/** @link: link in list of persistent exec queues */
+		struct list_head link;
+	} persistent;
+
+	union {
+		/**
+		 * @parallel: parallel submission state
+		 */
+		struct {
+			/** @composite_fence_ctx: context composite fence */
+			u64 composite_fence_ctx;
+			/** @composite_fence_seqno: seqno for composite fence */
+			u32 composite_fence_seqno;
+		} parallel;
+		/**
+		 * @bind: bind submission state
+		 */
+		struct {
+			/** @fence_ctx: context bind fence */
+			u64 fence_ctx;
+			/** @fence_seqno: seqno for bind fence */
+			u32 fence_seqno;
+		} bind;
+	};
+
+	/** @sched_props: scheduling properties */
+	struct {
+		/** @timeslice_us: timeslice period in micro-seconds */
+		u32 timeslice_us;
+		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		u32 preempt_timeout_us;
+	} sched_props;
+
+	/** @compute: compute exec queue state */
+	struct {
+		/** @pfence: preemption fence */
+		struct dma_fence *pfence;
+		/** @context: preemption fence context */
+		u64 context;
+		/** @seqno: preemption fence seqno */
+		u32 seqno;
+		/** @link: link into VM's list of exec queues */
+		struct list_head link;
+		/** @lock: preemption fences lock */
+		spinlock_t lock;
+	} compute;
+
+	/** @usm: unified shared memory state */
+	struct {
+		/** @acc_trigger: access counter trigger */
+		u32 acc_trigger;
+		/** @acc_notify: access counter notify */
+		u32 acc_notify;
+		/** @acc_granularity: access counter granularity */
+		u32 acc_granularity;
+	} usm;
+
+	/** @ops: submission backend exec queue operations */
+	const struct xe_exec_queue_ops *ops;
+
+	/** @ring_ops: ring operations for this exec queue */
+	const struct xe_ring_ops *ring_ops;
+	/** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
+	struct drm_sched_entity *entity;
+	/** @lrc: logical ring context for this exec queue */
+	struct xe_lrc lrc[];
+};
+
+/**
+ * struct xe_exec_queue_ops - Submission backend exec queue operations
+ */
+struct xe_exec_queue_ops {
+	/** @init: Initialize exec queue for submission backend */
+	int (*init)(struct xe_exec_queue *q);
+	/** @kill: Kill inflight submissions for backend */
+	void (*kill)(struct xe_exec_queue *q);
+	/** @fini: Fini exec queue for submission backend */
+	void (*fini)(struct xe_exec_queue *q);
+	/** @set_priority: Set priority for exec queue */
+	int (*set_priority)(struct xe_exec_queue *q,
+			    enum xe_exec_queue_priority priority);
+	/** @set_timeslice: Set timeslice for exec queue */
+	int (*set_timeslice)(struct xe_exec_queue *q, u32 timeslice_us);
+	/** @set_preempt_timeout: Set preemption timeout for exec queue */
+	int (*set_preempt_timeout)(struct xe_exec_queue *q, u32 preempt_timeout_us);
+	/** @set_job_timeout: Set job timeout for exec queue */
+	int (*set_job_timeout)(struct xe_exec_queue *q, u32 job_timeout_ms);
+	/**
+	 * @suspend: Suspend exec queue from executing, allowed to be called
+	 * multiple times in a row before resume with the caveat that
+	 * suspend_wait returns before calling suspend again.
+	 */
+	int (*suspend)(struct xe_exec_queue *q);
+	/**
+	 * @suspend_wait: Wait for an exec queue to suspend executing, should be
+	 * call after suspend.
+	 */
+	void (*suspend_wait)(struct xe_exec_queue *q);
+	/**
+	 * @resume: Resume exec queue execution, exec queue must be in a suspended
+	 * state and dma fence returned from most recent suspend call must be
+	 * signalled when this function is called.
+	 */
+	void (*resume)(struct xe_exec_queue *q);
+	/** @reset_status: check exec queue reset status */
+	bool (*reset_status)(struct xe_exec_queue *q);
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
new file mode 100644
index 000000000000..96b5224eb478
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -0,0 +1,474 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_execlist.h"
+
+#include <drm/drm_managed.h>
+
+#include "instructions/xe_mi_commands.h"
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gpu_commands.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_lrc_layout.h"
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_gt.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_mocs.h"
+#include "xe_ring_ops_types.h"
+#include "xe_sched_job.h"
+
+#define XE_EXECLIST_HANG_LIMIT 1
+
+#define SW_CTX_ID_SHIFT 37
+#define SW_CTX_ID_WIDTH 11
+#define XEHP_SW_CTX_ID_SHIFT  39
+#define XEHP_SW_CTX_ID_WIDTH  16
+
+#define SW_CTX_ID \
+	GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
+		    SW_CTX_ID_SHIFT)
+
+#define XEHP_SW_CTX_ID \
+	GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
+		    XEHP_SW_CTX_ID_SHIFT)
+
+
+static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
+			u32 ctx_id)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	u64 lrc_desc;
+
+	lrc_desc = xe_lrc_descriptor(lrc);
+
+	if (GRAPHICS_VERx100(xe) >= 1250) {
+		xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
+		lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
+	} else {
+		xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
+		lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
+	}
+
+	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
+		xe_mmio_write32(hwe->gt, RCU_MODE,
+				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
+
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+	lrc->ring.old_tail = lrc->ring.tail;
+
+	/*
+	 * Make sure the context image is complete before we submit it to HW.
+	 *
+	 * Ostensibly, writes (including the WCB) should be flushed prior to
+	 * an uncached write such as our mmio register access, the empirical
+	 * evidence (esp. on Braswell) suggests that the WC write into memory
+	 * may not be visible to the HW prior to the completion of the UC
+	 * register write and that we may begin execution from the context
+	 * before its image is complete leading to invalid PD chasing.
+	 */
+	wmb();
+
+	xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base),
+			xe_bo_ggtt_addr(hwe->hwsp));
+	xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base));
+	xe_mmio_write32(gt, RING_MODE(hwe->mmio_base),
+			_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
+
+	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
+			lower_32_bits(lrc_desc));
+	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
+			upper_32_bits(lrc_desc));
+	xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base),
+			EL_CTRL_LOAD);
+}
+
+static void __xe_execlist_port_start(struct xe_execlist_port *port,
+				     struct xe_execlist_exec_queue *exl)
+{
+	struct xe_device *xe = gt_to_xe(port->hwe->gt);
+	int max_ctx = FIELD_MAX(SW_CTX_ID);
+
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
+
+	xe_execlist_port_assert_held(port);
+
+	if (port->running_exl != exl || !exl->has_run) {
+		port->last_ctx_id++;
+
+		/* 0 is reserved for the kernel context */
+		if (port->last_ctx_id > max_ctx)
+			port->last_ctx_id = 1;
+	}
+
+	__start_lrc(port->hwe, exl->q->lrc, port->last_ctx_id);
+	port->running_exl = exl;
+	exl->has_run = true;
+}
+
+static void __xe_execlist_port_idle(struct xe_execlist_port *port)
+{
+	u32 noop[2] = { MI_NOOP, MI_NOOP };
+
+	xe_execlist_port_assert_held(port);
+
+	if (!port->running_exl)
+		return;
+
+	xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop));
+	__start_lrc(port->hwe, &port->hwe->kernel_lrc, 0);
+	port->running_exl = NULL;
+}
+
+static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
+{
+	struct xe_lrc *lrc = exl->q->lrc;
+
+	return lrc->ring.tail == lrc->ring.old_tail;
+}
+
+static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
+{
+	struct xe_execlist_exec_queue *exl = NULL;
+	int i;
+
+	xe_execlist_port_assert_held(port);
+
+	for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
+		while (!list_empty(&port->active[i])) {
+			exl = list_first_entry(&port->active[i],
+					       struct xe_execlist_exec_queue,
+					       active_link);
+			list_del(&exl->active_link);
+
+			if (xe_execlist_is_idle(exl)) {
+				exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
+				continue;
+			}
+
+			list_add_tail(&exl->active_link, &port->active[i]);
+			__xe_execlist_port_start(port, exl);
+			return;
+		}
+	}
+
+	__xe_execlist_port_idle(port);
+}
+
+static u64 read_execlist_status(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	u32 hi, lo;
+
+	lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
+	hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
+
+	return lo | (u64)hi << 32;
+}
+
+static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
+{
+	u64 status;
+
+	xe_execlist_port_assert_held(port);
+
+	status = read_execlist_status(port->hwe);
+	if (status & BIT(7))
+		return;
+
+	__xe_execlist_port_start_next_active(port);
+}
+
+static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
+					 u16 intr_vec)
+{
+	struct xe_execlist_port *port = hwe->exl_port;
+
+	spin_lock(&port->lock);
+	xe_execlist_port_irq_handler_locked(port);
+	spin_unlock(&port->lock);
+}
+
+static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
+					 enum xe_exec_queue_priority priority)
+{
+	xe_execlist_port_assert_held(port);
+
+	if (port->running_exl && port->running_exl->active_priority >= priority)
+		return;
+
+	__xe_execlist_port_start_next_active(port);
+}
+
+static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
+{
+	struct xe_execlist_port *port = exl->port;
+	enum xe_exec_queue_priority priority = exl->active_priority;
+
+	XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
+	XE_WARN_ON(priority < 0);
+	XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
+
+	spin_lock_irq(&port->lock);
+
+	if (exl->active_priority != priority &&
+	    exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
+		/* Priority changed, move it to the right list */
+		list_del(&exl->active_link);
+		exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
+	}
+
+	if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
+		exl->active_priority = priority;
+		list_add_tail(&exl->active_link, &port->active[priority]);
+	}
+
+	xe_execlist_port_wake_locked(exl->port, priority);
+
+	spin_unlock_irq(&port->lock);
+}
+
+static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
+{
+	struct xe_execlist_port *port =
+		container_of(timer, struct xe_execlist_port, irq_fail);
+
+	spin_lock_irq(&port->lock);
+	xe_execlist_port_irq_handler_locked(port);
+	spin_unlock_irq(&port->lock);
+
+	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
+	add_timer(&port->irq_fail);
+}
+
+struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
+						 struct xe_hw_engine *hwe)
+{
+	struct drm_device *drm = &xe->drm;
+	struct xe_execlist_port *port;
+	int i;
+
+	port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return ERR_PTR(-ENOMEM);
+
+	port->hwe = hwe;
+
+	spin_lock_init(&port->lock);
+	for (i = 0; i < ARRAY_SIZE(port->active); i++)
+		INIT_LIST_HEAD(&port->active[i]);
+
+	port->last_ctx_id = 1;
+	port->running_exl = NULL;
+
+	hwe->irq_handler = xe_execlist_port_irq_handler;
+
+	/* TODO: Fix the interrupt code so it doesn't race like mad */
+	timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
+	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
+	add_timer(&port->irq_fail);
+
+	return port;
+}
+
+void xe_execlist_port_destroy(struct xe_execlist_port *port)
+{
+	del_timer(&port->irq_fail);
+
+	/* Prevent an interrupt while we're destroying */
+	spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
+	port->hwe->irq_handler = NULL;
+	spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
+}
+
+static struct dma_fence *
+execlist_run_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+	struct xe_exec_queue *q = job->q;
+	struct xe_execlist_exec_queue *exl = job->q->execlist;
+
+	q->ring_ops->emit_job(job);
+	xe_execlist_make_active(exl);
+
+	return dma_fence_get(job->fence);
+}
+
+static void execlist_job_free(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+
+	xe_sched_job_put(job);
+}
+
+static const struct drm_sched_backend_ops drm_sched_ops = {
+	.run_job = execlist_run_job,
+	.free_job = execlist_job_free,
+};
+
+static int execlist_exec_queue_init(struct xe_exec_queue *q)
+{
+	struct drm_gpu_scheduler *sched;
+	struct xe_execlist_exec_queue *exl;
+	struct xe_device *xe = gt_to_xe(q->gt);
+	int err;
+
+	xe_assert(xe, !xe_device_uc_enabled(xe));
+
+	drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
+
+	exl = kzalloc(sizeof(*exl), GFP_KERNEL);
+	if (!exl)
+		return -ENOMEM;
+
+	exl->q = q;
+
+	err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
+			     q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
+			     XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
+			     NULL, NULL, q->hwe->name,
+			     gt_to_xe(q->gt)->drm.dev);
+	if (err)
+		goto err_free;
+
+	sched = &exl->sched;
+	err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
+	if (err)
+		goto err_sched;
+
+	exl->port = q->hwe->exl_port;
+	exl->has_run = false;
+	exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
+	q->execlist = exl;
+	q->entity = &exl->entity;
+
+	xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
+
+	return 0;
+
+err_sched:
+	drm_sched_fini(&exl->sched);
+err_free:
+	kfree(exl);
+	return err;
+}
+
+static void execlist_exec_queue_fini_async(struct work_struct *w)
+{
+	struct xe_execlist_exec_queue *ee =
+		container_of(w, struct xe_execlist_exec_queue, fini_async);
+	struct xe_exec_queue *q = ee->q;
+	struct xe_execlist_exec_queue *exl = q->execlist;
+	struct xe_device *xe = gt_to_xe(q->gt);
+	unsigned long flags;
+
+	xe_assert(xe, !xe_device_uc_enabled(xe));
+
+	spin_lock_irqsave(&exl->port->lock, flags);
+	if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
+		list_del(&exl->active_link);
+	spin_unlock_irqrestore(&exl->port->lock, flags);
+
+	if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT)
+		xe_device_remove_persistent_exec_queues(xe, q);
+	drm_sched_entity_fini(&exl->entity);
+	drm_sched_fini(&exl->sched);
+	kfree(exl);
+
+	xe_exec_queue_fini(q);
+}
+
+static void execlist_exec_queue_kill(struct xe_exec_queue *q)
+{
+	/* NIY */
+}
+
+static void execlist_exec_queue_fini(struct xe_exec_queue *q)
+{
+	INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
+	queue_work(system_unbound_wq, &q->execlist->fini_async);
+}
+
+static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
+					    enum xe_exec_queue_priority priority)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
+						   u32 preempt_timeout_us)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_exec_queue_set_job_timeout(struct xe_exec_queue *q,
+					       u32 job_timeout_ms)
+{
+	/* NIY */
+	return 0;
+}
+
+static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
+{
+	/* NIY */
+	return 0;
+}
+
+static void execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
+
+{
+	/* NIY */
+}
+
+static void execlist_exec_queue_resume(struct xe_exec_queue *q)
+{
+	/* NIY */
+}
+
+static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
+{
+	/* NIY */
+	return false;
+}
+
+static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
+	.init = execlist_exec_queue_init,
+	.kill = execlist_exec_queue_kill,
+	.fini = execlist_exec_queue_fini,
+	.set_priority = execlist_exec_queue_set_priority,
+	.set_timeslice = execlist_exec_queue_set_timeslice,
+	.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
+	.set_job_timeout = execlist_exec_queue_set_job_timeout,
+	.suspend = execlist_exec_queue_suspend,
+	.suspend_wait = execlist_exec_queue_suspend_wait,
+	.resume = execlist_exec_queue_resume,
+	.reset_status = execlist_exec_queue_reset_status,
+};
+
+int xe_execlist_init(struct xe_gt *gt)
+{
+	/* GuC submission enabled, nothing to do */
+	if (xe_device_uc_enabled(gt_to_xe(gt)))
+		return 0;
+
+	gt->exec_queue_ops = &execlist_exec_queue_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_execlist.h b/drivers/gpu/drm/xe/xe_execlist.h
new file mode 100644
index 000000000000..26f600ac8552
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_EXECLIST_H_
+#define _XE_EXECLIST_H_
+
+#include "xe_execlist_types.h"
+
+struct xe_device;
+struct xe_gt;
+
+#define xe_execlist_port_assert_held(port) lockdep_assert_held(&(port)->lock)
+
+int xe_execlist_init(struct xe_gt *gt);
+struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
+						 struct xe_hw_engine *hwe);
+void xe_execlist_port_destroy(struct xe_execlist_port *port);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h
new file mode 100644
index 000000000000..f94bbf4c53e4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist_types.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_EXECLIST_TYPES_H_
+#define _XE_EXECLIST_TYPES_H_
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "xe_exec_queue_types.h"
+
+struct xe_hw_engine;
+struct xe_execlist_exec_queue;
+
+struct xe_execlist_port {
+	struct xe_hw_engine *hwe;
+
+	spinlock_t lock;
+
+	struct list_head active[XE_EXEC_QUEUE_PRIORITY_COUNT];
+
+	u32 last_ctx_id;
+
+	struct xe_execlist_exec_queue *running_exl;
+
+	struct timer_list irq_fail;
+};
+
+struct xe_execlist_exec_queue {
+	struct xe_exec_queue *q;
+
+	struct drm_gpu_scheduler sched;
+
+	struct drm_sched_entity entity;
+
+	struct xe_execlist_port *port;
+
+	bool has_run;
+
+	struct work_struct fini_async;
+
+	enum xe_exec_queue_priority active_priority;
+	struct list_head active_link;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
new file mode 100644
index 000000000000..9bbe8a5040da
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_force_wake.h"
+
+#include <drm/drm_util.h>
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_reg_defs.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+
+#define XE_FORCE_WAKE_ACK_TIMEOUT_MS	50
+
+static struct xe_gt *
+fw_to_gt(struct xe_force_wake *fw)
+{
+	return fw->gt;
+}
+
+static struct xe_device *
+fw_to_xe(struct xe_force_wake *fw)
+{
+	return gt_to_xe(fw_to_gt(fw));
+}
+
+static void domain_init(struct xe_force_wake_domain *domain,
+			enum xe_force_wake_domain_id id,
+			struct xe_reg reg, struct xe_reg ack, u32 val, u32 mask)
+{
+	domain->id = id;
+	domain->reg_ctl = reg;
+	domain->reg_ack = ack;
+	domain->val = val;
+	domain->mask = mask;
+}
+
+void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	fw->gt = gt;
+	spin_lock_init(&fw->lock);
+
+	/* Assuming gen11+ so assert this assumption is correct */
+	xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
+
+	if (xe->info.graphics_verx100 >= 1270) {
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
+			    XE_FW_DOMAIN_ID_GT,
+			    FORCEWAKE_GT,
+			    FORCEWAKE_ACK_GT_MTL,
+			    BIT(0), BIT(16));
+	} else {
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
+			    XE_FW_DOMAIN_ID_GT,
+			    FORCEWAKE_GT,
+			    FORCEWAKE_ACK_GT,
+			    BIT(0), BIT(16));
+	}
+}
+
+void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
+{
+	int i, j;
+
+	/* Assuming gen11+ so assert this assumption is correct */
+	xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
+
+	if (!xe_gt_is_media_type(gt))
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER],
+			    XE_FW_DOMAIN_ID_RENDER,
+			    FORCEWAKE_RENDER,
+			    FORCEWAKE_ACK_RENDER,
+			    BIT(0), BIT(16));
+
+	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j],
+			    XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j,
+			    FORCEWAKE_MEDIA_VDBOX(j),
+			    FORCEWAKE_ACK_MEDIA_VDBOX(j),
+			    BIT(0), BIT(16));
+	}
+
+	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j],
+			    XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j,
+			    FORCEWAKE_MEDIA_VEBOX(j),
+			    FORCEWAKE_ACK_MEDIA_VEBOX(j),
+			    BIT(0), BIT(16));
+	}
+
+	if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))
+		domain_init(&fw->domains[XE_FW_DOMAIN_ID_GSC],
+			    XE_FW_DOMAIN_ID_GSC,
+			    FORCEWAKE_GSC,
+			    FORCEWAKE_ACK_GSC,
+			    BIT(0), BIT(16));
+}
+
+static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain)
+{
+	xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val);
+}
+
+static int domain_wake_wait(struct xe_gt *gt,
+			    struct xe_force_wake_domain *domain)
+{
+	return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val,
+			      XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
+			      NULL, true);
+}
+
+static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain)
+{
+	xe_mmio_write32(gt, domain->reg_ctl, domain->mask);
+}
+
+static int domain_sleep_wait(struct xe_gt *gt,
+			     struct xe_force_wake_domain *domain)
+{
+	return xe_mmio_wait32(gt, domain->reg_ack, domain->val, 0,
+			      XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
+			      NULL, true);
+}
+
+#define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \
+	for (tmp__ = (mask__); tmp__; tmp__ &= ~BIT(ffs(tmp__) - 1)) \
+		for_each_if((domain__ = ((fw__)->domains + \
+					 (ffs(tmp__) - 1))) && \
+					 domain__->reg_ctl.addr)
+
+int xe_force_wake_get(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains)
+{
+	struct xe_device *xe = fw_to_xe(fw);
+	struct xe_gt *gt = fw_to_gt(fw);
+	struct xe_force_wake_domain *domain;
+	enum xe_force_wake_domains tmp, woken = 0;
+	unsigned long flags;
+	int ret, ret2 = 0;
+
+	spin_lock_irqsave(&fw->lock, flags);
+	for_each_fw_domain_masked(domain, domains, fw, tmp) {
+		if (!domain->ref++) {
+			woken |= BIT(domain->id);
+			domain_wake(gt, domain);
+		}
+	}
+	for_each_fw_domain_masked(domain, woken, fw, tmp) {
+		ret = domain_wake_wait(gt, domain);
+		ret2 |= ret;
+		if (ret)
+			drm_notice(&xe->drm, "Force wake domain (%d) failed to ack wake, ret=%d\n",
+				   domain->id, ret);
+	}
+	fw->awake_domains |= woken;
+	spin_unlock_irqrestore(&fw->lock, flags);
+
+	return ret2;
+}
+
+int xe_force_wake_put(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains)
+{
+	struct xe_device *xe = fw_to_xe(fw);
+	struct xe_gt *gt = fw_to_gt(fw);
+	struct xe_force_wake_domain *domain;
+	enum xe_force_wake_domains tmp, sleep = 0;
+	unsigned long flags;
+	int ret, ret2 = 0;
+
+	spin_lock_irqsave(&fw->lock, flags);
+	for_each_fw_domain_masked(domain, domains, fw, tmp) {
+		if (!--domain->ref) {
+			sleep |= BIT(domain->id);
+			domain_sleep(gt, domain);
+		}
+	}
+	for_each_fw_domain_masked(domain, sleep, fw, tmp) {
+		ret = domain_sleep_wait(gt, domain);
+		ret2 |= ret;
+		if (ret)
+			drm_notice(&xe->drm, "Force wake domain (%d) failed to ack sleep, ret=%d\n",
+				   domain->id, ret);
+	}
+	fw->awake_domains &= ~sleep;
+	spin_unlock_irqrestore(&fw->lock, flags);
+
+	return ret2;
+}
diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
new file mode 100644
index 000000000000..83cb157da7cc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_FORCE_WAKE_H_
+#define _XE_FORCE_WAKE_H_
+
+#include "xe_assert.h"
+#include "xe_force_wake_types.h"
+
+struct xe_gt;
+
+void xe_force_wake_init_gt(struct xe_gt *gt,
+			   struct xe_force_wake *fw);
+void xe_force_wake_init_engines(struct xe_gt *gt,
+				struct xe_force_wake *fw);
+int xe_force_wake_get(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains);
+int xe_force_wake_put(struct xe_force_wake *fw,
+		      enum xe_force_wake_domains domains);
+
+static inline int
+xe_force_wake_ref(struct xe_force_wake *fw,
+		  enum xe_force_wake_domains domain)
+{
+	xe_gt_assert(fw->gt, domain);
+	return fw->domains[ffs(domain) - 1].ref;
+}
+
+static inline void
+xe_force_wake_assert_held(struct xe_force_wake *fw,
+			  enum xe_force_wake_domains domain)
+{
+	xe_gt_assert(fw->gt, fw->awake_domains & domain);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h
new file mode 100644
index 000000000000..ed0edc2cdf9f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake_types.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_FORCE_WAKE_TYPES_H_
+#define _XE_FORCE_WAKE_TYPES_H_
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+#include "regs/xe_reg_defs.h"
+
+enum xe_force_wake_domain_id {
+	XE_FW_DOMAIN_ID_GT = 0,
+	XE_FW_DOMAIN_ID_RENDER,
+	XE_FW_DOMAIN_ID_MEDIA,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX0,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX1,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX2,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX3,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX4,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX5,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX6,
+	XE_FW_DOMAIN_ID_MEDIA_VDBOX7,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX0,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX1,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX2,
+	XE_FW_DOMAIN_ID_MEDIA_VEBOX3,
+	XE_FW_DOMAIN_ID_GSC,
+	XE_FW_DOMAIN_ID_COUNT
+};
+
+enum xe_force_wake_domains {
+	XE_FW_GT		= BIT(XE_FW_DOMAIN_ID_GT),
+	XE_FW_RENDER		= BIT(XE_FW_DOMAIN_ID_RENDER),
+	XE_FW_MEDIA		= BIT(XE_FW_DOMAIN_ID_MEDIA),
+	XE_FW_MEDIA_VDBOX0	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX0),
+	XE_FW_MEDIA_VDBOX1	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX1),
+	XE_FW_MEDIA_VDBOX2	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX2),
+	XE_FW_MEDIA_VDBOX3	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX3),
+	XE_FW_MEDIA_VDBOX4	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX4),
+	XE_FW_MEDIA_VDBOX5	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX5),
+	XE_FW_MEDIA_VDBOX6	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX6),
+	XE_FW_MEDIA_VDBOX7	= BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX7),
+	XE_FW_MEDIA_VEBOX0	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX0),
+	XE_FW_MEDIA_VEBOX1	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX1),
+	XE_FW_MEDIA_VEBOX2	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2),
+	XE_FW_MEDIA_VEBOX3	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3),
+	XE_FW_GSC		= BIT(XE_FW_DOMAIN_ID_GSC),
+	XE_FORCEWAKE_ALL	= BIT(XE_FW_DOMAIN_ID_COUNT) - 1
+};
+
+/**
+ * struct xe_force_wake_domain - XE force wake domains
+ */
+struct xe_force_wake_domain {
+	/** @id: domain force wake id */
+	enum xe_force_wake_domain_id id;
+	/** @reg_ctl: domain wake control register address */
+	struct xe_reg reg_ctl;
+	/** @reg_ack: domain ack register address */
+	struct xe_reg reg_ack;
+	/** @val: domain wake write value */
+	u32 val;
+	/** @mask: domain mask */
+	u32 mask;
+	/** @ref: domain reference */
+	u32 ref;
+};
+
+/**
+ * struct xe_force_wake - XE force wake
+ */
+struct xe_force_wake {
+	/** @gt: back pointers to GT */
+	struct xe_gt *gt;
+	/** @lock: protects everything force wake struct */
+	spinlock_t lock;
+	/** @awake_domains: mask of all domains awake */
+	enum xe_force_wake_domains awake_domains;
+	/** @domains: force wake domains */
+	struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
new file mode 100644
index 000000000000..106ee2b027f0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#define HEADER \
+	"// SPDX-License-Identifier: MIT\n" \
+	"\n" \
+	"/*\n" \
+	" * DO NOT MODIFY.\n" \
+	" *\n" \
+	" * This file was generated from rules: %s\n" \
+	" */\n" \
+	"#ifndef _GENERATED_XE_WA_OOB_\n" \
+	"#define _GENERATED_XE_WA_OOB_\n" \
+	"\n" \
+	"enum {\n"
+
+#define FOOTER \
+	"};\n" \
+	"\n" \
+	"#endif\n"
+
+static void print_usage(FILE *f)
+{
+	fprintf(f, "usage: %s <input-rule-file> <generated-c-source-file> <generated-c-header-file>\n",
+		program_invocation_short_name);
+}
+
+static void print_parse_error(const char *err_msg, const char *line,
+			      unsigned int lineno)
+{
+	fprintf(stderr, "ERROR: %s\nERROR: %u: %.60s\n",
+		err_msg, lineno, line);
+}
+
+static char *strip(char *line, size_t linelen)
+{
+	while (isspace(*(line + linelen)))
+		linelen--;
+
+	line[linelen - 1] = '\0';
+
+	return  line + strspn(line, " \f\n\r\t\v");
+}
+
+#define MAX_LINE_LEN 4096
+static int parse(FILE *input, FILE *csource, FILE *cheader)
+{
+	char line[MAX_LINE_LEN + 1];
+	char *name, *prev_name = NULL, *rules;
+	unsigned int lineno = 0, idx = 0;
+
+	while (fgets(line, sizeof(line), input)) {
+		size_t linelen;
+		bool is_continuation;
+
+		if (line[0] == '\0' || line[0] == '#' || line[0] == '\n') {
+			lineno++;
+			continue;
+		}
+
+		linelen = strlen(line);
+		if (linelen == MAX_LINE_LEN) {
+			print_parse_error("line too long", line, lineno);
+			return -EINVAL;
+		}
+
+		is_continuation = isspace(line[0]);
+		name = strip(line, linelen);
+
+		if (!is_continuation) {
+			name = strtok(name, " \t");
+			rules = strtok(NULL, "");
+		} else {
+			if (!prev_name) {
+				print_parse_error("invalid rule continuation",
+						  line, lineno);
+				return -EINVAL;
+			}
+
+			rules = name;
+			name = NULL;
+		}
+
+		if (rules[0] == '\0') {
+			print_parse_error("invalid empty rule\n", line, lineno);
+			return -EINVAL;
+		}
+
+		if (name) {
+			fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx);
+			fprintf(csource, "{ XE_RTP_NAME(\"%s\"), XE_RTP_RULES(%s) },\n",
+				name, rules);
+		} else {
+			fprintf(csource, "{ XE_RTP_NAME(NULL), XE_RTP_RULES(%s) },\n",
+				rules);
+		}
+
+		idx++;
+		lineno++;
+		if (!is_continuation)
+			prev_name = name;
+	}
+
+	fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx);
+
+	return 0;
+}
+
+int main(int argc, const char *argv[])
+{
+	enum {
+		ARGS_INPUT,
+		ARGS_CSOURCE,
+		ARGS_CHEADER,
+		_ARGS_COUNT
+	};
+	struct {
+		const char *fn;
+		const char *mode;
+		FILE *f;
+	} args[] = {
+		[ARGS_INPUT] = { .fn = argv[1], .mode = "r" },
+		[ARGS_CSOURCE] = { .fn = argv[2], .mode = "w" },
+		[ARGS_CHEADER] = { .fn = argv[3], .mode = "w" },
+	};
+	int ret = 1;
+
+	if (argc < 3) {
+		fprintf(stderr, "ERROR: wrong arguments\n");
+		print_usage(stderr);
+		return 1;
+	}
+
+	for (int i = 0; i < _ARGS_COUNT; i++) {
+		args[i].f = fopen(args[i].fn, args[i].mode);
+		if (!args[i].f) {
+			fprintf(stderr, "ERROR: Can't open %s: %m\n",
+				args[i].fn);
+			goto err;
+		}
+	}
+
+	fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn);
+	ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f,
+		    args[ARGS_CHEADER].f);
+	if (!ret)
+		fprintf(args[ARGS_CHEADER].f, FOOTER);
+
+err:
+	for (int i = 0; i < _ARGS_COUNT; i++) {
+		if (args[i].f)
+			fclose(args[i].f);
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
new file mode 100644
index 000000000000..3efd2d066bf7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_ggtt.h"
+
+#include <linux/sizes.h>
+
+#include <drm/drm_managed.h>
+#include <drm/i915_drm.h>
+
+#include "regs/xe_gt_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_wopcm.h"
+
+#define XELPG_GGTT_PTE_PAT0	BIT_ULL(52)
+#define XELPG_GGTT_PTE_PAT1	BIT_ULL(53)
+
+/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
+#define GUC_GGTT_TOP	0xFEE00000
+
+static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
+				   u16 pat_index)
+{
+	u64 pte;
+
+	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pte |= XE_PAGE_PRESENT;
+
+	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
+		pte |= XE_GGTT_PTE_DM;
+
+	return pte;
+}
+
+static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
+				    u16 pat_index)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u64 pte;
+
+	pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, pat_index);
+
+	xe_assert(xe, pat_index <= 3);
+
+	if (pat_index & BIT(0))
+		pte |= XELPG_GGTT_PTE_PAT0;
+
+	if (pat_index & BIT(1))
+		pte |= XELPG_GGTT_PTE_PAT1;
+
+	return pte;
+}
+
+static unsigned int probe_gsm_size(struct pci_dev *pdev)
+{
+	u16 gmch_ctl, ggms;
+
+	pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl);
+	ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK;
+	return ggms ? SZ_1M << ggms : 0;
+}
+
+void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
+{
+	xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK));
+	xe_tile_assert(ggtt->tile, addr < ggtt->size);
+
+	writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]);
+}
+
+static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
+{
+	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
+	u64 end = start + size - 1;
+	u64 scratch_pte;
+
+	xe_tile_assert(ggtt->tile, start < end);
+
+	if (ggtt->scratch)
+		scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0,
+							  pat_index);
+	else
+		scratch_pte = 0;
+
+	while (start < end) {
+		xe_ggtt_set_pte(ggtt, start, scratch_pte);
+		start += XE_PAGE_SIZE;
+	}
+}
+
+static void ggtt_fini_early(struct drm_device *drm, void *arg)
+{
+	struct xe_ggtt *ggtt = arg;
+
+	mutex_destroy(&ggtt->lock);
+	drm_mm_takedown(&ggtt->mm);
+}
+
+static void ggtt_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_ggtt *ggtt = arg;
+
+	ggtt->scratch = NULL;
+}
+
+static void primelockdep(struct xe_ggtt *ggtt)
+{
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+	might_lock(&ggtt->lock);
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+static const struct xe_ggtt_pt_ops xelp_pt_ops = {
+	.pte_encode_bo = xelp_ggtt_pte_encode_bo,
+};
+
+static const struct xe_ggtt_pt_ops xelpg_pt_ops = {
+	.pte_encode_bo = xelpg_ggtt_pte_encode_bo,
+};
+
+/*
+ * Early GGTT initialization, which allows to create new mappings usable by the
+ * GuC.
+ * Mappings are not usable by the HW engines, as it doesn't have scratch /
+ * initial clear done to it yet. That will happen in the regular, non-early
+ * GGTT init.
+ */
+int xe_ggtt_init_early(struct xe_ggtt *ggtt)
+{
+	struct xe_device *xe = tile_to_xe(ggtt->tile);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	unsigned int gsm_size;
+
+	gsm_size = probe_gsm_size(pdev);
+	if (gsm_size == 0) {
+		drm_err(&xe->drm, "Hardware reported no preallocated GSM\n");
+		return -ENOMEM;
+	}
+
+	ggtt->gsm = ggtt->tile->mmio.regs + SZ_8M;
+	ggtt->size = (gsm_size / 8) * (u64) XE_PAGE_SIZE;
+
+	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+		ggtt->flags |= XE_GGTT_FLAGS_64K;
+
+	/*
+	 * 8B per entry, each points to a 4KB page.
+	 *
+	 * The GuC address space is limited on both ends of the GGTT, because
+	 * the GuC shim HW redirects accesses to those addresses to other HW
+	 * areas instead of going through the GGTT. On the bottom end, the GuC
+	 * can't access offsets below the WOPCM size, while on the top side the
+	 * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of
+	 * checking each object to see if they are accessed by GuC or not, we
+	 * just exclude those areas from the allocator. Additionally, to
+	 * simplify the driver load, we use the maximum WOPCM size in this logic
+	 * instead of the programmed one, so we don't need to wait until the
+	 * actual size to be programmed is determined (which requires FW fetch)
+	 * before initializing the GGTT. These simplifications might waste space
+	 * in the GGTT (about 20-25 MBs depending on the platform) but we can
+	 * live with this.
+	 *
+	 * Another benifit of this is the GuC bootrom can't access anything
+	 * below the WOPCM max size so anything the bootom needs to access (e.g.
+	 * a RSA key) needs to be placed in the GGTT above the WOPCM max size.
+	 * Starting the GGTT allocations above the WOPCM max give us the correct
+	 * placement for free.
+	 */
+	if (ggtt->size > GUC_GGTT_TOP)
+		ggtt->size = GUC_GGTT_TOP;
+
+	if (GRAPHICS_VERx100(xe) >= 1270)
+		ggtt->pt_ops = &xelpg_pt_ops;
+	else
+		ggtt->pt_ops = &xelp_pt_ops;
+
+	drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
+		    ggtt->size - xe_wopcm_size(xe));
+	mutex_init(&ggtt->lock);
+	primelockdep(ggtt);
+
+	return drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt);
+}
+
+static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
+{
+	struct drm_mm_node *hole;
+	u64 start, end;
+
+	/* Display may have allocated inside ggtt, so be careful with clearing here */
+	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
+	mutex_lock(&ggtt->lock);
+	drm_mm_for_each_hole(hole, &ggtt->mm, start, end)
+		xe_ggtt_clear(ggtt, start, end - start);
+
+	xe_ggtt_invalidate(ggtt);
+	mutex_unlock(&ggtt->lock);
+	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+}
+
+int xe_ggtt_init(struct xe_ggtt *ggtt)
+{
+	struct xe_device *xe = tile_to_xe(ggtt->tile);
+	unsigned int flags;
+	int err;
+
+	/*
+	 * So we don't need to worry about 64K GGTT layout when dealing with
+	 * scratch entires, rather keep the scratch page in system memory on
+	 * platforms where 64K pages are needed for VRAM.
+	 */
+	flags = XE_BO_CREATE_PINNED_BIT;
+	if (ggtt->flags & XE_GGTT_FLAGS_64K)
+		flags |= XE_BO_CREATE_SYSTEM_BIT;
+	else
+		flags |= XE_BO_CREATE_VRAM_IF_DGFX(ggtt->tile);
+
+	ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags);
+	if (IS_ERR(ggtt->scratch)) {
+		err = PTR_ERR(ggtt->scratch);
+		goto err;
+	}
+
+	xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size);
+
+	xe_ggtt_initial_clear(ggtt);
+
+	return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt);
+err:
+	ggtt->scratch = NULL;
+	return err;
+}
+
+#define GUC_TLB_INV_CR				XE_REG(0xcee8)
+#define   GUC_TLB_INV_CR_INVALIDATE		REG_BIT(0)
+#define PVC_GUC_TLB_INV_DESC0			XE_REG(0xcf7c)
+#define   PVC_GUC_TLB_INV_DESC0_VALID		REG_BIT(0)
+#define PVC_GUC_TLB_INV_DESC1			XE_REG(0xcf80)
+#define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	REG_BIT(6)
+
+static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
+{
+	if (!gt)
+		return;
+
+	/*
+	 * Invalidation can happen when there's no in-flight work keeping the
+	 * GT awake.  We need to explicitly grab forcewake to ensure the GT
+	 * and GuC are accessible.
+	 */
+	xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+
+	/* TODO: vfunc for GuC vs. non-GuC */
+
+	if (gt->uc.guc.submission_state.enabled) {
+		int seqno;
+
+		seqno = xe_gt_tlb_invalidation_guc(gt);
+		xe_gt_assert(gt, seqno > 0);
+		if (seqno > 0)
+			xe_gt_tlb_invalidation_wait(gt, seqno);
+	} else if (xe_device_uc_enabled(gt_to_xe(gt))) {
+		struct xe_device *xe = gt_to_xe(gt);
+
+		if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
+			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
+					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
+			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0,
+					PVC_GUC_TLB_INV_DESC0_VALID);
+		} else
+			xe_mmio_write32(gt, GUC_TLB_INV_CR,
+					GUC_TLB_INV_CR_INVALIDATE);
+	}
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
+{
+	/* Each GT in a tile has its own TLB to cache GGTT lookups */
+	ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
+	ggtt_invalidate_gt_tlb(ggtt->tile->media_gt);
+}
+
+void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
+{
+	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
+	u64 addr, scratch_pte;
+
+	scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index);
+
+	printk("%sGlobal GTT:", prefix);
+	for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) {
+		unsigned int i = addr / XE_PAGE_SIZE;
+
+		xe_tile_assert(ggtt->tile, addr <= U32_MAX);
+		if (ggtt->gsm[i] == scratch_pte)
+			continue;
+
+		printk("%s    ggtt[0x%08x] = 0x%016llx",
+		       prefix, (u32)addr, ggtt->gsm[i]);
+	}
+}
+
+int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+				       u32 size, u32 align, u32 mm_flags)
+{
+	return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0,
+					  mm_flags);
+}
+
+int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+				u32 size, u32 align)
+{
+	int ret;
+
+	mutex_lock(&ggtt->lock);
+	ret = xe_ggtt_insert_special_node_locked(ggtt, node, size,
+						 align, DRM_MM_INSERT_HIGH);
+	mutex_unlock(&ggtt->lock);
+
+	return ret;
+}
+
+void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
+	u64 start = bo->ggtt_node.start;
+	u64 offset, pte;
+
+	for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
+		pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index);
+		xe_ggtt_set_pte(ggtt, start + offset, pte);
+	}
+
+	xe_ggtt_invalidate(ggtt);
+}
+
+static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
+				  u64 start, u64 end)
+{
+	int err;
+	u64 alignment = XE_PAGE_SIZE;
+
+	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
+		alignment = SZ_64K;
+
+	if (XE_WARN_ON(bo->ggtt_node.size)) {
+		/* Someone's already inserted this BO in the GGTT */
+		xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
+		return 0;
+	}
+
+	err = xe_bo_validate(bo, NULL, false);
+	if (err)
+		return err;
+
+	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
+	mutex_lock(&ggtt->lock);
+	err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size,
+					  alignment, 0, start, end, 0);
+	if (!err)
+		xe_ggtt_map_bo(ggtt, bo);
+	mutex_unlock(&ggtt->lock);
+	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+
+	return err;
+}
+
+int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
+			 u64 start, u64 end)
+{
+	return __xe_ggtt_insert_bo_at(ggtt, bo, start, end);
+}
+
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+	return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
+}
+
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
+{
+	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
+	mutex_lock(&ggtt->lock);
+
+	xe_ggtt_clear(ggtt, node->start, node->size);
+	drm_mm_remove_node(node);
+	node->size = 0;
+
+	xe_ggtt_invalidate(ggtt);
+
+	mutex_unlock(&ggtt->lock);
+	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+}
+
+void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+	if (XE_WARN_ON(!bo->ggtt_node.size))
+		return;
+
+	/* This BO is not currently in the GGTT */
+	xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
+
+	xe_ggtt_remove_node(ggtt, &bo->ggtt_node);
+}
+
+int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p)
+{
+	int err;
+
+	err = mutex_lock_interruptible(&ggtt->lock);
+	if (err)
+		return err;
+
+	drm_mm_print(&ggtt->mm, p);
+	mutex_unlock(&ggtt->lock);
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
new file mode 100644
index 000000000000..a09c166dff70
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_GGTT_H_
+#define _XE_GGTT_H_
+
+#include "xe_ggtt_types.h"
+
+struct drm_printer;
+
+void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte);
+void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
+int xe_ggtt_init_early(struct xe_ggtt *ggtt);
+int xe_ggtt_init(struct xe_ggtt *ggtt);
+void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
+
+int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+				u32 size, u32 align);
+int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt,
+				       struct drm_mm_node *node,
+				       u32 size, u32 align, u32 mm_flags);
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node);
+void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
+			 u64 start, u64 end);
+void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+
+int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
new file mode 100644
index 000000000000..d8c584d9a8c3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GGTT_TYPES_H_
+#define _XE_GGTT_TYPES_H_
+
+#include <drm/drm_mm.h>
+
+#include "xe_pt_types.h"
+
+struct xe_bo;
+struct xe_gt;
+
+struct xe_ggtt_pt_ops {
+	u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index);
+};
+
+struct xe_ggtt {
+	struct xe_tile *tile;
+
+	u64 size;
+
+#define XE_GGTT_FLAGS_64K BIT(0)
+	unsigned int flags;
+
+	struct xe_bo *scratch;
+
+	struct mutex lock;
+
+	u64 __iomem *gsm;
+
+	const struct xe_ggtt_pt_ops *pt_ops;
+
+	struct drm_mm mm;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
new file mode 100644
index 000000000000..e4ad1d6ce1d5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gpu_scheduler.h"
+
+static void xe_sched_process_msg_queue(struct xe_gpu_scheduler *sched)
+{
+	if (!READ_ONCE(sched->base.pause_submit))
+		queue_work(sched->base.submit_wq, &sched->work_process_msg);
+}
+
+static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched)
+{
+	struct xe_sched_msg *msg;
+
+	spin_lock(&sched->base.job_list_lock);
+	msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link);
+	if (msg)
+		xe_sched_process_msg_queue(sched);
+	spin_unlock(&sched->base.job_list_lock);
+}
+
+static struct xe_sched_msg *
+xe_sched_get_msg(struct xe_gpu_scheduler *sched)
+{
+	struct xe_sched_msg *msg;
+
+	spin_lock(&sched->base.job_list_lock);
+	msg = list_first_entry_or_null(&sched->msgs,
+				       struct xe_sched_msg, link);
+	if (msg)
+		list_del(&msg->link);
+	spin_unlock(&sched->base.job_list_lock);
+
+	return msg;
+}
+
+static void xe_sched_process_msg_work(struct work_struct *w)
+{
+	struct xe_gpu_scheduler *sched =
+		container_of(w, struct xe_gpu_scheduler, work_process_msg);
+	struct xe_sched_msg *msg;
+
+	if (READ_ONCE(sched->base.pause_submit))
+		return;
+
+	msg = xe_sched_get_msg(sched);
+	if (msg) {
+		sched->ops->process_msg(msg);
+
+		xe_sched_process_msg_queue_if_ready(sched);
+	}
+}
+
+int xe_sched_init(struct xe_gpu_scheduler *sched,
+		  const struct drm_sched_backend_ops *ops,
+		  const struct xe_sched_backend_ops *xe_ops,
+		  struct workqueue_struct *submit_wq,
+		  uint32_t hw_submission, unsigned hang_limit,
+		  long timeout, struct workqueue_struct *timeout_wq,
+		  atomic_t *score, const char *name,
+		  struct device *dev)
+{
+	sched->ops = xe_ops;
+	INIT_LIST_HEAD(&sched->msgs);
+	INIT_WORK(&sched->work_process_msg, xe_sched_process_msg_work);
+
+	return drm_sched_init(&sched->base, ops, submit_wq, 1, hw_submission,
+			      hang_limit, timeout, timeout_wq, score, name,
+			      dev);
+}
+
+void xe_sched_fini(struct xe_gpu_scheduler *sched)
+{
+	xe_sched_submission_stop(sched);
+	drm_sched_fini(&sched->base);
+}
+
+void xe_sched_submission_start(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_wqueue_start(&sched->base);
+	queue_work(sched->base.submit_wq, &sched->work_process_msg);
+}
+
+void xe_sched_submission_stop(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_wqueue_stop(&sched->base);
+	cancel_work_sync(&sched->work_process_msg);
+}
+
+void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
+		      struct xe_sched_msg *msg)
+{
+	spin_lock(&sched->base.job_list_lock);
+	list_add_tail(&msg->link, &sched->msgs);
+	spin_unlock(&sched->base.job_list_lock);
+
+	xe_sched_process_msg_queue(sched);
+}
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
new file mode 100644
index 000000000000..10c6bb9c9386
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_SCHEDULER_H_
+#define _XE_GPU_SCHEDULER_H_
+
+#include "xe_gpu_scheduler_types.h"
+#include "xe_sched_job_types.h"
+
+int xe_sched_init(struct xe_gpu_scheduler *sched,
+		  const struct drm_sched_backend_ops *ops,
+		  const struct xe_sched_backend_ops *xe_ops,
+		  struct workqueue_struct *submit_wq,
+		  uint32_t hw_submission, unsigned hang_limit,
+		  long timeout, struct workqueue_struct *timeout_wq,
+		  atomic_t *score, const char *name,
+		  struct device *dev);
+void xe_sched_fini(struct xe_gpu_scheduler *sched);
+
+void xe_sched_submission_start(struct xe_gpu_scheduler *sched);
+void xe_sched_submission_stop(struct xe_gpu_scheduler *sched);
+
+void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
+		      struct xe_sched_msg *msg);
+
+static inline void xe_sched_stop(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_stop(&sched->base, NULL);
+}
+
+static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_tdr_queue_imm(&sched->base);
+}
+
+static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
+{
+	drm_sched_resubmit_jobs(&sched->base);
+}
+
+static inline bool
+xe_sched_invalidate_job(struct xe_sched_job *job, int threshold)
+{
+	return drm_sched_invalidate_job(&job->drm, threshold);
+}
+
+static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched,
+					    struct xe_sched_job *job)
+{
+	list_add(&job->drm.list, &sched->base.pending_list);
+}
+
+static inline
+struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched)
+{
+	return list_first_entry_or_null(&sched->base.pending_list,
+					struct xe_sched_job, drm.list);
+}
+
+static inline int
+xe_sched_entity_init(struct xe_sched_entity *entity,
+		     struct xe_gpu_scheduler *sched)
+{
+	return drm_sched_entity_init(entity, 0,
+				     (struct drm_gpu_scheduler **)&sched,
+				     1, NULL);
+}
+
+#define xe_sched_entity_fini drm_sched_entity_fini
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h
new file mode 100644
index 000000000000..6731b13da8bb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_SCHEDULER_TYPES_H_
+#define _XE_GPU_SCHEDULER_TYPES_H_
+
+#include <drm/gpu_scheduler.h>
+
+/**
+ * struct xe_sched_msg - an in-band (relative to GPU scheduler run queue)
+ * message
+ *
+ * Generic enough for backend defined messages, backend can expand if needed.
+ */
+struct xe_sched_msg {
+	/** @link: list link into the gpu scheduler list of messages */
+	struct list_head		link;
+	/**
+	 * @private_data: opaque pointer to message private data (backend defined)
+	 */
+	void				*private_data;
+	/** @opcode: opcode of message (backend defined) */
+	unsigned int			opcode;
+};
+
+/**
+ * struct xe_sched_backend_ops - Define the backend operations called by the
+ * scheduler
+ */
+struct xe_sched_backend_ops {
+	/**
+	 * @process_msg: Process a message. Allowed to block, it is this
+	 * function's responsibility to free message if dynamically allocated.
+	 */
+	void (*process_msg)(struct xe_sched_msg *msg);
+};
+
+/**
+ * struct xe_gpu_scheduler - Xe GPU scheduler
+ */
+struct xe_gpu_scheduler {
+	/** @base: DRM GPU scheduler */
+	struct drm_gpu_scheduler		base;
+	/** @ops: Xe scheduler ops */
+	const struct xe_sched_backend_ops	*ops;
+	/** @msgs: list of messages to be processed in @work_process_msg */
+	struct list_head			msgs;
+	/** @work_process_msg: processes messages */
+	struct work_struct		work_process_msg;
+};
+
+#define xe_sched_entity		drm_sched_entity
+#define xe_sched_policy		drm_sched_policy
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
new file mode 100644
index 000000000000..a8a895cf4b44
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gsc.h"
+
+#include <drm/drm_managed.h>
+
+#include "abi/gsc_mkhi_commands_abi.h"
+#include "generated/xe_wa_oob.h"
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_gsc_submit.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_huc.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_sched_job.h"
+#include "xe_uc_fw.h"
+#include "xe_wa.h"
+#include "instructions/xe_gsc_commands.h"
+#include "regs/xe_gsc_regs.h"
+
+static struct xe_gt *
+gsc_to_gt(struct xe_gsc *gsc)
+{
+	return container_of(gsc, struct xe_gt, uc.gsc);
+}
+
+static int memcpy_fw(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 fw_size = gsc->fw.size;
+	void *storage;
+
+	/*
+	 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use
+	 * a memcpy for now.
+	 */
+	storage = kmalloc(fw_size, GFP_KERNEL);
+	if (!storage)
+		return -ENOMEM;
+
+	xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size);
+	xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size);
+	xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size);
+
+	kfree(storage);
+
+	return 0;
+}
+
+static int emit_gsc_upload(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	u64 offset = xe_bo_ggtt_addr(gsc->private);
+	struct xe_bb *bb;
+	struct xe_sched_job *job;
+	struct dma_fence *fence;
+	long timeout;
+
+	bb = xe_bb_new(gt, 4, false);
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	bb->cs[bb->len++] = GSC_FW_LOAD;
+	bb->cs[bb->len++] = lower_32_bits(offset);
+	bb->cs[bb->len++] = upper_32_bits(offset);
+	bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID;
+
+	job = xe_bb_create_job(gsc->q, bb);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
+
+#define version_query_wr(xe_, map_, offset_, field_, val_) \
+	xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_)
+#define version_query_rd(xe_, map_, offset_, field_) \
+	xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_)
+
+static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset)
+{
+	xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in));
+
+	version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV);
+	version_query_wr(xe, map, wr_offset, header.command,
+			 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION);
+
+	return wr_offset + sizeof(struct gsc_get_compatibility_version_in);
+}
+
+#define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */
+static int query_compatibility_version(struct xe_gsc *gsc)
+{
+	struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY];
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *bo;
+	u32 wr_offset;
+	u32 rd_offset;
+	u64 ggtt_offset;
+	int err;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_SYSTEM_BIT |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo)) {
+		xe_gt_err(gt, "failed to allocate bo for GSC version query\n");
+		return PTR_ERR(bo);
+	}
+
+	ggtt_offset = xe_bo_ggtt_addr(bo);
+
+	wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0,
+				       sizeof(struct gsc_get_compatibility_version_in));
+	wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset);
+
+	err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset,
+				       ggtt_offset + GSC_VER_PKT_SZ,
+				       GSC_VER_PKT_SZ);
+	if (err) {
+		xe_gt_err(gt,
+			  "failed to submit GSC request for compatibility version: %d\n",
+			  err);
+		goto out_bo;
+	}
+
+	err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ,
+				     sizeof(struct gsc_get_compatibility_version_out),
+				     &rd_offset);
+	if (err) {
+		xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err);
+		return err;
+	}
+
+	compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major);
+	compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor);
+
+	xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor);
+
+out_bo:
+	xe_bo_unpin_map_no_vm(bo);
+	return err;
+}
+
+static int gsc_fw_is_loaded(struct xe_gt *gt)
+{
+	return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
+			      HECI1_FWSTS1_INIT_COMPLETE;
+}
+
+static int gsc_fw_wait(struct xe_gt *gt)
+{
+	/*
+	 * GSC load can take up to 250ms from the moment the instruction is
+	 * executed by the GSCCS. To account for possible submission delays or
+	 * other issues, we use a 500ms timeout in the wait here.
+	 */
+	return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
+			      HECI1_FWSTS1_INIT_COMPLETE,
+			      HECI1_FWSTS1_INIT_COMPLETE,
+			      500 * USEC_PER_MSEC, NULL, false);
+}
+
+static int gsc_upload(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	/* we should only be here if the init step were successful */
+	xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q);
+
+	if (gsc_fw_is_loaded(gt)) {
+		xe_gt_err(gt, "GSC already loaded at upload time\n");
+		return -EEXIST;
+	}
+
+	err = memcpy_fw(gsc);
+	if (err) {
+		xe_gt_err(gt, "Failed to memcpy GSC FW\n");
+		return err;
+	}
+
+	/*
+	 * GSC is only killed by an FLR, so we need to trigger one on unload to
+	 * make sure we stop it. This is because we assign a chunk of memory to
+	 * the GSC as part of the FW load, so we need to make sure it stops
+	 * using it when we release it to the system on driver unload. Note that
+	 * this is not a problem of the unload per-se, because the GSC will not
+	 * touch that memory unless there are requests for it coming from the
+	 * driver; therefore, no accesses will happen while Xe is not loaded,
+	 * but if we re-load the driver then the GSC might wake up and try to
+	 * access that old memory location again.
+	 * Given that an FLR is a very disruptive action (see the FLR function
+	 * for details), we want to do it as the last action before releasing
+	 * the access to the MMIO bar, which means we need to do it as part of
+	 * mmio cleanup.
+	 */
+	xe->needs_flr_on_fini = true;
+
+	err = emit_gsc_upload(gsc);
+	if (err) {
+		xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err));
+		return err;
+	}
+
+	err = gsc_fw_wait(gt);
+	if (err) {
+		xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err));
+		return err;
+	}
+
+	err = query_compatibility_version(gsc);
+	if (err)
+		return err;
+
+	err = xe_uc_fw_check_version_requirements(&gsc->fw);
+	if (err)
+		return err;
+
+	xe_gt_dbg(gt, "GSC FW async load completed\n");
+
+	return 0;
+}
+
+static void gsc_work(struct work_struct *work)
+{
+	struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+	int ret;
+
+	xe_device_mem_access_get(xe);
+	xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+
+	ret = gsc_upload(gsc);
+	if (ret && ret != -EEXIST) {
+		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+		goto out;
+	}
+
+	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
+
+	/* HuC auth failure is not fatal */
+	if (xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GUC))
+		xe_huc_auth(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC);
+
+out:
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+	xe_device_mem_access_put(xe);
+}
+
+int xe_gsc_init(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	int ret;
+
+	gsc->fw.type = XE_UC_FW_TYPE_GSC;
+	INIT_WORK(&gsc->work, gsc_work);
+
+	/* The GSC uC is only available on the media GT */
+	if (tile->media_gt && (gt != tile->media_gt)) {
+		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED);
+		return 0;
+	}
+
+	/*
+	 * Some platforms can have GuC but not GSC. That would cause
+	 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort
+	 * all firmware loading. So check for GSC being enabled before
+	 * propagating the failure back up. That way the higher level will keep
+	 * going and load GuC as appropriate.
+	 */
+	ret = xe_uc_fw_init(&gsc->fw);
+	if (!xe_uc_fw_is_enabled(&gsc->fw))
+		return 0;
+	else if (ret)
+		goto out;
+
+	return 0;
+
+out:
+	xe_gt_err(gt, "GSC init failed with %d", ret);
+	return ret;
+}
+
+static void free_resources(struct drm_device *drm, void *arg)
+{
+	struct xe_gsc *gsc = arg;
+
+	if (gsc->wq) {
+		destroy_workqueue(gsc->wq);
+		gsc->wq = NULL;
+	}
+
+	if (gsc->q) {
+		xe_exec_queue_put(gsc->q);
+		gsc->q = NULL;
+	}
+
+	if (gsc->private) {
+		xe_bo_unpin_map_no_vm(gsc->private);
+		gsc->private = NULL;
+	}
+}
+
+int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true);
+	struct xe_exec_queue *q;
+	struct workqueue_struct *wq;
+	struct xe_bo *bo;
+	int err;
+
+	if (!xe_uc_fw_is_available(&gsc->fw))
+		return 0;
+
+	if (!hwe)
+		return -ENODEV;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_STOLEN_BIT |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	q = xe_exec_queue_create(xe, NULL,
+				 BIT(hwe->logical_instance), 1, hwe,
+				 EXEC_QUEUE_FLAG_KERNEL |
+				 EXEC_QUEUE_FLAG_PERMANENT);
+	if (IS_ERR(q)) {
+		xe_gt_err(gt, "Failed to create queue for GSC submission\n");
+		err = PTR_ERR(q);
+		goto out_bo;
+	}
+
+	wq = alloc_ordered_workqueue("gsc-ordered-wq", 0);
+	if (!wq) {
+		err = -ENOMEM;
+		goto out_q;
+	}
+
+	gsc->private = bo;
+	gsc->q = q;
+	gsc->wq = wq;
+
+	err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc);
+	if (err)
+		return err;
+
+	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE);
+
+	return 0;
+
+out_q:
+	xe_exec_queue_put(q);
+out_bo:
+	xe_bo_unpin_map_no_vm(bo);
+	return err;
+}
+
+void xe_gsc_load_start(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+
+	if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q)
+		return;
+
+	/* GSC FW survives GT reset and D3Hot */
+	if (gsc_fw_is_loaded(gt)) {
+		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
+		return;
+	}
+
+	queue_work(gsc->wq, &gsc->work);
+}
+
+void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
+{
+	if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq)
+		flush_work(&gsc->work);
+}
+
+/*
+ * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a
+ * GSC engine reset by writing a notification bit in the GS1 register and then
+ * triggering an interrupt to GSC; from the interrupt it will take up to 200ms
+ * for the FW to get prepare for the reset, so we need to wait for that amount
+ * of time.
+ * After the reset is complete we need to then clear the GS1 register.
+ */
+void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
+{
+	u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0;
+	u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP;
+
+	/* WA only applies if the GSC is loaded */
+	if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt))
+		return;
+
+	xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
+
+	if (prep) {
+		/* make sure the reset bit is clear when writing the CSR reg */
+		xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE),
+			      HECI_H_CSR_RST, HECI_H_CSR_IG);
+		msleep(200);
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h
new file mode 100644
index 000000000000..bc1ef7f31ea2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_H_
+#define _XE_GSC_H_
+
+#include "xe_gsc_types.h"
+
+struct xe_gt;
+
+int xe_gsc_init(struct xe_gsc *gsc);
+int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc);
+void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc);
+void xe_gsc_load_start(struct xe_gsc *gsc);
+
+void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c
new file mode 100644
index 000000000000..8c5381e5913f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gsc_submit.h"
+
+#include "abi/gsc_command_header_abi.h"
+#include "xe_bb.h"
+#include "xe_exec_queue.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_types.h"
+#include "xe_map.h"
+#include "xe_sched_job.h"
+#include "instructions/xe_gsc_commands.h"
+#include "regs/xe_gsc_regs.h"
+
+#define GSC_HDR_SIZE (sizeof(struct intel_gsc_mtl_header)) /* shorthand define */
+
+#define mtl_gsc_header_wr(xe_, map_, offset_, field_, val_) \
+	xe_map_wr_field(xe_, map_, offset_, struct intel_gsc_mtl_header, field_, val_)
+
+#define mtl_gsc_header_rd(xe_, map_, offset_, field_) \
+	xe_map_rd_field(xe_, map_, offset_, struct intel_gsc_mtl_header, field_)
+
+/*
+ * GSC FW allows us to define the host_session_handle as we see fit, as long
+ * as we use unique identifier for each user, with handle 0 being reserved for
+ * kernel usage.
+ * To be able to differentiate which client subsystem owns the given session, we
+ * include the client id in the top 8 bits of the handle.
+ */
+#define HOST_SESSION_CLIENT_MASK GENMASK_ULL(63, 56)
+
+static struct xe_gt *
+gsc_to_gt(struct xe_gsc *gsc)
+{
+	return container_of(gsc, struct xe_gt, uc.gsc);
+}
+
+/**
+ * xe_gsc_emit_header - write the MTL GSC header in memory
+ * @xe: the Xe device
+ * @map: the iosys map to write to
+ * @offset: offset from the start of the map at which to write the header
+ * @heci_client_id: client id identifying the type of command (see abi for values)
+ * @host_session_id: host session ID of the caller
+ * @payload_size: size of the payload that follows the header
+ *
+ * Returns: offset memory location following the header
+ */
+u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset,
+		       u8 heci_client_id, u64 host_session_id, u32 payload_size)
+{
+	xe_assert(xe, !(host_session_id & HOST_SESSION_CLIENT_MASK));
+
+	if (host_session_id)
+		host_session_id |= FIELD_PREP(HOST_SESSION_CLIENT_MASK, heci_client_id);
+
+	xe_map_memset(xe, map, offset, 0, GSC_HDR_SIZE);
+
+	mtl_gsc_header_wr(xe, map, offset, validity_marker, GSC_HECI_VALIDITY_MARKER);
+	mtl_gsc_header_wr(xe, map, offset, heci_client_id, heci_client_id);
+	mtl_gsc_header_wr(xe, map, offset, host_session_handle, host_session_id);
+	mtl_gsc_header_wr(xe, map, offset, header_version, MTL_GSC_HEADER_VERSION);
+	mtl_gsc_header_wr(xe, map, offset, message_size, payload_size + GSC_HDR_SIZE);
+
+	return offset + GSC_HDR_SIZE;
+};
+
+/**
+ * xe_gsc_check_and_update_pending - check the pending bit and update the input
+ * header with the retry handle from the output header
+ * @xe: the Xe device
+ * @in: the iosys map containing the input buffer
+ * @offset_in: offset within the iosys at which the input buffer is located
+ * @out: the iosys map containing the output buffer
+ * @offset_out: offset within the iosys at which the output buffer is located
+ *
+ * Returns: true if the pending bit was set, false otherwise
+ */
+bool xe_gsc_check_and_update_pending(struct xe_device *xe,
+				     struct iosys_map *in, u32 offset_in,
+				     struct iosys_map *out, u32 offset_out)
+{
+	if (mtl_gsc_header_rd(xe, out, offset_out, flags) & GSC_OUTFLAG_MSG_PENDING) {
+		u64 handle = mtl_gsc_header_rd(xe, out, offset_out, gsc_message_handle);
+
+		mtl_gsc_header_wr(xe, in, offset_in, gsc_message_handle, handle);
+
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * xe_gsc_read_out_header - reads and validates the output header and returns
+ * the offset of the reply following the header
+ * @xe: the Xe device
+ * @map: the iosys map containing the output buffer
+ * @offset: offset within the iosys at which the output buffer is located
+ * @min_payload_size: minimum size of the message excluding the gsc header
+ * @payload_offset: optional pointer to be set to the payload offset
+ *
+ * Returns: -errno value on failure, 0 otherwise
+ */
+int xe_gsc_read_out_header(struct xe_device *xe,
+			   struct iosys_map *map, u32 offset,
+			   u32 min_payload_size,
+			   u32 *payload_offset)
+{
+	u32 marker = mtl_gsc_header_rd(xe, map, offset, validity_marker);
+	u32 size = mtl_gsc_header_rd(xe, map, offset, message_size);
+	u32 payload_size = size - GSC_HDR_SIZE;
+
+	if (marker != GSC_HECI_VALIDITY_MARKER)
+		return -EPROTO;
+
+	if (size < GSC_HDR_SIZE || payload_size < min_payload_size)
+		return -ENODATA;
+
+	if (payload_offset)
+		*payload_offset = offset + GSC_HDR_SIZE;
+
+	return 0;
+}
+
+/**
+ * xe_gsc_pkt_submit_kernel - submit a kernel heci pkt to the GSC
+ * @gsc: the GSC uC
+ * @addr_in: GGTT address of the message to send to the GSC
+ * @size_in: size of the message to send to the GSC
+ * @addr_out: GGTT address for the GSC to write the reply to
+ * @size_out: size of the memory reserved for the reply
+ */
+int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in,
+			     u64 addr_out, u32 size_out)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_bb *bb;
+	struct xe_sched_job *job;
+	struct dma_fence *fence;
+	long timeout;
+
+	if (size_in < GSC_HDR_SIZE)
+		return -ENODATA;
+
+	if (size_out < GSC_HDR_SIZE)
+		return -ENOMEM;
+
+	bb = xe_bb_new(gt, 8, false);
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	bb->cs[bb->len++] = GSC_HECI_CMD_PKT;
+	bb->cs[bb->len++] = lower_32_bits(addr_in);
+	bb->cs[bb->len++] = upper_32_bits(addr_in);
+	bb->cs[bb->len++] = size_in;
+	bb->cs[bb->len++] = lower_32_bits(addr_out);
+	bb->cs[bb->len++] = upper_32_bits(addr_out);
+	bb->cs[bb->len++] = size_out;
+	bb->cs[bb->len++] = 0;
+
+	job = xe_bb_create_job(gsc->q, bb);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.h b/drivers/gpu/drm/xe/xe_gsc_submit.h
new file mode 100644
index 000000000000..0801da5d446a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_SUBMIT_H_
+#define _XE_GSC_SUBMIT_H_
+
+#include <linux/types.h>
+
+struct iosys_map;
+struct xe_device;
+struct xe_gsc;
+
+u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset,
+		       u8 heci_client_id, u64 host_session_id, u32 payload_size);
+
+bool xe_gsc_check_and_update_pending(struct xe_device *xe,
+				     struct iosys_map *in, u32 offset_in,
+				     struct iosys_map *out, u32 offset_out);
+
+int xe_gsc_read_out_header(struct xe_device *xe,
+			   struct iosys_map *map, u32 offset,
+			   u32 min_payload_size,
+			   u32 *payload_offset);
+
+int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in,
+			     u64 addr_out, u32 size_out);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h
new file mode 100644
index 000000000000..57fefd66a7ea
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_types.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_TYPES_H_
+#define _XE_GSC_TYPES_H_
+
+#include <linux/workqueue.h>
+
+#include "xe_uc_fw_types.h"
+
+struct xe_bo;
+struct xe_exec_queue;
+
+/**
+ * struct xe_gsc - GSC
+ */
+struct xe_gsc {
+	/** @fw: Generic uC firmware management */
+	struct xe_uc_fw fw;
+
+	/** @security_version: SVN found in the fetched blob */
+	u32 security_version;
+
+	/** @private: Private data for use by the GSC FW */
+	struct xe_bo *private;
+
+	/** @q: Default queue used for submissions to GSC FW */
+	struct xe_exec_queue *q;
+
+	/** @wq: workqueue to handle jobs for delayed load and proxy handling */
+	struct workqueue_struct *wq;
+
+	/** @work: delayed load and proxy handling work */
+	struct work_struct work;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
new file mode 100644
index 000000000000..3af2adec1295
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -0,0 +1,778 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt.h"
+
+#include <linux/minmax.h>
+
+#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
+
+#include "instructions/xe_gfxpipe_commands.h"
+#include "instructions/xe_mi_commands.h"
+#include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_execlist.h"
+#include "xe_force_wake.h"
+#include "xe_ggtt.h"
+#include "xe_gsc.h"
+#include "xe_gt_ccs_mode.h"
+#include "xe_gt_clock.h"
+#include "xe_gt_freq.h"
+#include "xe_gt_idle.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_sysfs.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_gt_topology.h"
+#include "xe_guc_exec_queue_types.h"
+#include "xe_guc_pc.h"
+#include "xe_hw_fence.h"
+#include "xe_hw_engine_class_sysfs.h"
+#include "xe_irq.h"
+#include "xe_lmtt.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_migrate.h"
+#include "xe_mmio.h"
+#include "xe_pat.h"
+#include "xe_mocs.h"
+#include "xe_reg_sr.h"
+#include "xe_ring_ops.h"
+#include "xe_sa.h"
+#include "xe_sched_job.h"
+#include "xe_sriov.h"
+#include "xe_tuning.h"
+#include "xe_uc.h"
+#include "xe_vm.h"
+#include "xe_wa.h"
+#include "xe_wopcm.h"
+
+struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
+{
+	struct xe_gt *gt;
+
+	gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL);
+	if (!gt)
+		return ERR_PTR(-ENOMEM);
+
+	gt->tile = tile;
+	gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
+
+	return gt;
+}
+
+void xe_gt_sanitize(struct xe_gt *gt)
+{
+	/*
+	 * FIXME: if xe_uc_sanitize is called here, on TGL driver will not
+	 * reload
+	 */
+	gt->uc.guc.submission_state.enabled = false;
+}
+
+static void gt_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+	int i;
+
+	destroy_workqueue(gt->ordered_wq);
+
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+}
+
+static void gt_reset_worker(struct work_struct *w);
+
+static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
+{
+	struct xe_sched_job *job;
+	struct xe_bb *bb;
+	struct dma_fence *fence;
+	long timeout;
+
+	bb = xe_bb_new(gt, 4, false);
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	job = xe_bb_create_job(q, bb);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
+
+/*
+ * Convert back from encoded value to type-safe, only to be used when reg.mcr
+ * is true
+ */
+static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg)
+{
+	return (const struct xe_reg_mcr){.__reg.raw = reg.raw };
+}
+
+static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
+{
+	struct xe_reg_sr *sr = &q->hwe->reg_lrc;
+	struct xe_reg_sr_entry *entry;
+	unsigned long idx;
+	struct xe_sched_job *job;
+	struct xe_bb *bb;
+	struct dma_fence *fence;
+	long timeout;
+	int count = 0;
+
+	if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
+		/* Big enough to emit all of the context's 3DSTATE */
+		bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false);
+	else
+		/* Just pick a large BB size */
+		bb = xe_bb_new(gt, SZ_4K, false);
+
+	if (IS_ERR(bb))
+		return PTR_ERR(bb);
+
+	xa_for_each(&sr->xa, idx, entry)
+		++count;
+
+	if (count) {
+		xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
+
+		bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
+
+		xa_for_each(&sr->xa, idx, entry) {
+			struct xe_reg reg = entry->reg;
+			struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg);
+			u32 val;
+
+			/*
+			 * Skip reading the register if it's not really needed
+			 */
+			if (reg.masked)
+				val = entry->clr_bits << 16;
+			else if (entry->clr_bits + 1)
+				val = (reg.mcr ?
+				       xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
+				       xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+			else
+				val = 0;
+
+			val |= entry->set_bits;
+
+			bb->cs[bb->len++] = reg.addr;
+			bb->cs[bb->len++] = val;
+			xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val);
+		}
+	}
+
+	xe_lrc_emit_hwe_state_instructions(q, bb);
+
+	job = xe_bb_create_job(q, bb);
+	if (IS_ERR(job)) {
+		xe_bb_free(bb, NULL);
+		return PTR_ERR(job);
+	}
+
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	timeout = dma_fence_wait_timeout(fence, false, HZ);
+	dma_fence_put(fence);
+	xe_bb_free(bb, NULL);
+	if (timeout < 0)
+		return timeout;
+	else if (!timeout)
+		return -ETIME;
+
+	return 0;
+}
+
+int xe_gt_record_default_lrcs(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	int err = 0;
+
+	for_each_hw_engine(hwe, gt, id) {
+		struct xe_exec_queue *q, *nop_q;
+		void *default_lrc;
+
+		if (gt->default_lrc[hwe->class])
+			continue;
+
+		xe_reg_sr_init(&hwe->reg_lrc, hwe->name, xe);
+		xe_wa_process_lrc(hwe);
+		xe_hw_engine_setup_default_lrc_state(hwe);
+		xe_tuning_process_lrc(hwe);
+
+		default_lrc = drmm_kzalloc(&xe->drm,
+					   xe_lrc_size(xe, hwe->class),
+					   GFP_KERNEL);
+		if (!default_lrc)
+			return -ENOMEM;
+
+		q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1,
+					 hwe, EXEC_QUEUE_FLAG_KERNEL);
+		if (IS_ERR(q)) {
+			err = PTR_ERR(q);
+			xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
+				  hwe->name, q);
+			return err;
+		}
+
+		/* Prime golden LRC with known good state */
+		err = emit_wa_job(gt, q);
+		if (err) {
+			xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n",
+				  hwe->name, ERR_PTR(err), q->guc->id);
+			goto put_exec_queue;
+		}
+
+		nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance),
+					     1, hwe, EXEC_QUEUE_FLAG_KERNEL);
+		if (IS_ERR(nop_q)) {
+			err = PTR_ERR(nop_q);
+			xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
+				  hwe->name, nop_q);
+			goto put_exec_queue;
+		}
+
+		/* Switch to different LRC */
+		err = emit_nop_job(gt, nop_q);
+		if (err) {
+			xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n",
+				  hwe->name, ERR_PTR(err), nop_q->guc->id);
+			goto put_nop_q;
+		}
+
+		/* Reload golden LRC to record the effect of any indirect W/A */
+		err = emit_nop_job(gt, q);
+		if (err) {
+			xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n",
+				  hwe->name, ERR_PTR(err), q->guc->id);
+			goto put_nop_q;
+		}
+
+		xe_map_memcpy_from(xe, default_lrc,
+				   &q->lrc[0].bo->vmap,
+				   xe_lrc_pphwsp_offset(&q->lrc[0]),
+				   xe_lrc_size(xe, hwe->class));
+
+		gt->default_lrc[hwe->class] = default_lrc;
+put_nop_q:
+		xe_exec_queue_put(nop_q);
+put_exec_queue:
+		xe_exec_queue_put(q);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+int xe_gt_init_early(struct xe_gt *gt)
+{
+	int err;
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	xe_gt_topology_init(gt);
+	xe_gt_mcr_init(gt);
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	xe_reg_sr_init(&gt->reg_sr, "GT", gt_to_xe(gt));
+
+	err = xe_wa_init(gt);
+	if (err)
+		return err;
+
+	xe_wa_process_gt(gt);
+	xe_wa_process_oob(gt);
+	xe_tuning_process_gt(gt);
+
+	return 0;
+}
+
+static void dump_pat_on_error(struct xe_gt *gt)
+{
+	struct drm_printer p;
+	char prefix[32];
+
+	snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id);
+	p = drm_debug_printer(prefix);
+
+	xe_pat_dump(gt, &p);
+}
+
+static int gt_fw_domain_init(struct xe_gt *gt)
+{
+	int err, i;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_hw_fence_irq;
+
+	xe_pat_init(gt);
+
+	if (!xe_gt_is_media_type(gt)) {
+		err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
+		if (err)
+			goto err_force_wake;
+		if (IS_SRIOV_PF(gt_to_xe(gt)))
+			xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
+	}
+
+	err = xe_uc_init(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	/* Raise GT freq to speed up HuC/GuC load */
+	xe_guc_pc_init_early(&gt->uc.guc.pc);
+
+	err = xe_uc_init_hwconfig(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	xe_gt_idle_sysfs_init(&gt->gtidle);
+
+	/* XXX: Fake that we pull the engine mask from hwconfig blob */
+	gt->info.engine_mask = gt->info.__engine_mask;
+
+	/* Enable per hw engine IRQs */
+	xe_irq_enable_hwe(gt);
+
+	/* Rerun MCR init as we now have hw engine list */
+	xe_gt_mcr_init(gt);
+
+	err = xe_hw_engines_init_early(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_hw_engine_class_sysfs_init(gt);
+	if (err)
+		drm_warn(&gt_to_xe(gt)->drm,
+			 "failed to register engines sysfs directory, err: %d\n",
+			 err);
+
+	/* Initialize CCS mode sysfs after early initialization of HW engines */
+	xe_gt_ccs_mode_sysfs_init(gt);
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	XE_WARN_ON(err);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return 0;
+
+err_force_wake:
+	dump_pat_on_error(gt);
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_hw_fence_irq:
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return err;
+}
+
+static int all_fw_domain_init(struct xe_gt *gt)
+{
+	int err, i;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_hw_fence_irq;
+
+	xe_gt_mcr_set_implicit_defaults(gt);
+	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
+
+	err = xe_gt_clock_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	xe_mocs_init(gt);
+	err = xe_execlist_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_hw_engines_init(gt);
+	if (err)
+		goto err_force_wake;
+
+	err = xe_uc_init_post_hwconfig(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	if (!xe_gt_is_media_type(gt)) {
+		/*
+		 * USM has its only SA pool to non-block behind user operations
+		 */
+		if (gt_to_xe(gt)->info.has_usm) {
+			gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), SZ_1M, 16);
+			if (IS_ERR(gt->usm.bb_pool)) {
+				err = PTR_ERR(gt->usm.bb_pool);
+				goto err_force_wake;
+			}
+		}
+	}
+
+	if (!xe_gt_is_media_type(gt)) {
+		struct xe_tile *tile = gt_to_tile(gt);
+
+		tile->migrate = xe_migrate_init(tile);
+		if (IS_ERR(tile->migrate)) {
+			err = PTR_ERR(tile->migrate);
+			goto err_force_wake;
+		}
+	}
+
+	err = xe_uc_init_hw(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	/* Configure default CCS mode of 1 engine with all resources */
+	if (xe_gt_ccs_mode_enabled(gt)) {
+		gt->ccs_mode = 1;
+		xe_gt_apply_ccs_mode(gt);
+	}
+
+	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
+		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return 0;
+
+err_force_wake:
+	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+err_hw_fence_irq:
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return err;
+}
+
+int xe_gt_init(struct xe_gt *gt)
+{
+	int err;
+	int i;
+
+	INIT_WORK(&gt->reset.worker, gt_reset_worker);
+
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) {
+		gt->ring_ops[i] = xe_ring_ops_get(gt, i);
+		xe_hw_fence_irq_init(&gt->fence_irq[i]);
+	}
+
+	err = xe_gt_tlb_invalidation_init(gt);
+	if (err)
+		return err;
+
+	err = xe_gt_pagefault_init(gt);
+	if (err)
+		return err;
+
+	xe_mocs_init_early(gt);
+
+	xe_gt_sysfs_init(gt);
+
+	err = gt_fw_domain_init(gt);
+	if (err)
+		return err;
+
+	xe_gt_freq_init(gt);
+
+	xe_force_wake_init_engines(gt, gt_to_fw(gt));
+
+	err = all_fw_domain_init(gt);
+	if (err)
+		return err;
+
+	err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int do_gt_reset(struct xe_gt *gt)
+{
+	int err;
+
+	xe_gsc_wa_14015076503(gt, true);
+
+	xe_mmio_write32(gt, GDRST, GRDOM_FULL);
+	err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
+	if (err)
+		xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n",
+			  ERR_PTR(err));
+
+	xe_gsc_wa_14015076503(gt, false);
+
+	return err;
+}
+
+static int do_gt_restart(struct xe_gt *gt)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	int err;
+
+	xe_pat_init(gt);
+
+	xe_gt_mcr_set_implicit_defaults(gt);
+	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
+
+	err = xe_wopcm_init(&gt->uc.wopcm);
+	if (err)
+		return err;
+
+	for_each_hw_engine(hwe, gt, id)
+		xe_hw_engine_enable_ring(hwe);
+
+	err = xe_uc_sanitize_reset(&gt->uc);
+	if (err)
+		return err;
+
+	err = xe_uc_init_hw(&gt->uc);
+	if (err)
+		return err;
+
+	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
+		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
+
+	xe_mocs_init(gt);
+	err = xe_uc_start(&gt->uc);
+	if (err)
+		return err;
+
+	for_each_hw_engine(hwe, gt, id) {
+		xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
+		xe_reg_sr_apply_whitelist(hwe);
+	}
+
+	/* Get CCS mode in sync between sw/hw */
+	xe_gt_apply_ccs_mode(gt);
+
+	return 0;
+}
+
+static int gt_reset(struct xe_gt *gt)
+{
+	int err;
+
+	/* We only support GT resets with GuC submission */
+	if (!xe_device_uc_enabled(gt_to_xe(gt)))
+		return -ENODEV;
+
+	xe_gt_info(gt, "reset started\n");
+
+	if (xe_fault_inject_gt_reset()) {
+		err = -ECANCELED;
+		goto err_fail;
+	}
+
+	xe_gt_sanitize(gt);
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_msg;
+
+	xe_uc_gucrc_disable(&gt->uc);
+	xe_uc_stop_prepare(&gt->uc);
+	xe_gt_pagefault_reset(gt);
+
+	err = xe_uc_stop(&gt->uc);
+	if (err)
+		goto err_out;
+
+	err = do_gt_reset(gt);
+	if (err)
+		goto err_out;
+
+	xe_gt_tlb_invalidation_reset(gt);
+
+	err = do_gt_restart(gt);
+	if (err)
+		goto err_out;
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	xe_device_mem_access_put(gt_to_xe(gt));
+	XE_WARN_ON(err);
+
+	xe_gt_info(gt, "reset done\n");
+
+	return 0;
+
+err_out:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+	XE_WARN_ON(xe_uc_start(&gt->uc));
+	xe_device_mem_access_put(gt_to_xe(gt));
+err_fail:
+	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
+
+	gt_to_xe(gt)->needs_flr_on_fini = true;
+
+	return err;
+}
+
+static void gt_reset_worker(struct work_struct *w)
+{
+	struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
+
+	gt_reset(gt);
+}
+
+void xe_gt_reset_async(struct xe_gt *gt)
+{
+	xe_gt_info(gt, "trying reset\n");
+
+	/* Don't do a reset while one is already in flight */
+	if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc))
+		return;
+
+	xe_gt_info(gt, "reset queued\n");
+	queue_work(gt->ordered_wq, &gt->reset.worker);
+}
+
+void xe_gt_suspend_prepare(struct xe_gt *gt)
+{
+	xe_device_mem_access_get(gt_to_xe(gt));
+	XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+	xe_uc_stop_prepare(&gt->uc);
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	xe_device_mem_access_put(gt_to_xe(gt));
+}
+
+int xe_gt_suspend(struct xe_gt *gt)
+{
+	int err;
+
+	xe_gt_sanitize(gt);
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_msg;
+
+	err = xe_uc_suspend(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	xe_device_mem_access_put(gt_to_xe(gt));
+	xe_gt_info(gt, "suspended\n");
+
+	return 0;
+
+err_force_wake:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+	xe_device_mem_access_put(gt_to_xe(gt));
+	xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
+
+	return err;
+}
+
+int xe_gt_resume(struct xe_gt *gt)
+{
+	int err;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_msg;
+
+	err = do_gt_restart(gt);
+	if (err)
+		goto err_force_wake;
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+	xe_device_mem_access_put(gt_to_xe(gt));
+	xe_gt_info(gt, "resumed\n");
+
+	return 0;
+
+err_force_wake:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+	xe_device_mem_access_put(gt_to_xe(gt));
+	xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
+
+	return err;
+}
+
+struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
+				     enum xe_engine_class class,
+				     u16 instance, bool logical)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id)
+		if (hwe->class == class &&
+		    ((!logical && hwe->instance == instance) ||
+		    (logical && hwe->logical_instance == instance)))
+			return hwe;
+
+	return NULL;
+}
+
+struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
+							 enum xe_engine_class class)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id) {
+		switch (class) {
+		case XE_ENGINE_CLASS_RENDER:
+		case XE_ENGINE_CLASS_COMPUTE:
+			if (hwe->class == XE_ENGINE_CLASS_RENDER ||
+			    hwe->class == XE_ENGINE_CLASS_COMPUTE)
+				return hwe;
+			break;
+		default:
+			if (hwe->class == class)
+				return hwe;
+		}
+	}
+
+	return NULL;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
new file mode 100644
index 000000000000..4486e083f5ef
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_H_
+#define _XE_GT_H_
+
+#include <drm/drm_util.h>
+
+#include "xe_device_types.h"
+#include "xe_hw_engine.h"
+
+#define for_each_hw_engine(hwe__, gt__, id__) \
+	for ((id__) = 0; (id__) < ARRAY_SIZE((gt__)->hw_engines); (id__)++) \
+		for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
+			  xe_hw_engine_is_valid((hwe__)))
+
+#define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0)
+
+#ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h> /* XXX: fault-inject.h is broken */
+extern struct fault_attr gt_reset_failure;
+static inline bool xe_fault_inject_gt_reset(void)
+{
+	return should_fail(&gt_reset_failure, 1);
+}
+#else
+static inline bool xe_fault_inject_gt_reset(void)
+{
+	return false;
+}
+#endif
+
+struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
+int xe_gt_init_early(struct xe_gt *gt);
+int xe_gt_init(struct xe_gt *gt);
+int xe_gt_record_default_lrcs(struct xe_gt *gt);
+void xe_gt_suspend_prepare(struct xe_gt *gt);
+int xe_gt_suspend(struct xe_gt *gt);
+int xe_gt_resume(struct xe_gt *gt);
+void xe_gt_reset_async(struct xe_gt *gt);
+void xe_gt_sanitize(struct xe_gt *gt);
+
+/**
+ * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
+ * first that matches the same reset domain as @class
+ * @gt: GT structure
+ * @class: hw engine class to lookup
+ */
+struct xe_hw_engine *
+xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, enum xe_engine_class class);
+
+struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
+				     enum xe_engine_class class,
+				     u16 instance,
+				     bool logical);
+
+static inline bool xe_gt_is_media_type(struct xe_gt *gt)
+{
+	return gt->info.type == XE_GT_TYPE_MEDIA;
+}
+
+static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
+		hwe->instance == gt->usm.reserved_bcs_instance;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
new file mode 100644
index 000000000000..529fc286cd06
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
+#include "xe_gt.h"
+#include "xe_gt_ccs_mode.h"
+#include "xe_gt_sysfs.h"
+#include "xe_mmio.h"
+
+static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
+{
+	u32 mode = CCS_MODE_CSLICE_0_3_MASK; /* disable all by default */
+	int num_slices = hweight32(CCS_MASK(gt));
+	struct xe_device *xe = gt_to_xe(gt);
+	int width, cslice = 0;
+	u32 config = 0;
+
+	xe_assert(xe, xe_gt_ccs_mode_enabled(gt));
+
+	xe_assert(xe, num_engines && num_engines <= num_slices);
+	xe_assert(xe, !(num_slices % num_engines));
+
+	/*
+	 * Loop over all available slices and assign each a user engine.
+	 * For example, if there are four compute slices available, the
+	 * assignment of compute slices to compute engines would be,
+	 *
+	 * With 1 engine (ccs0):
+	 *   slice 0, 1, 2, 3: ccs0
+	 *
+	 * With 2 engines (ccs0, ccs1):
+	 *   slice 0, 2: ccs0
+	 *   slice 1, 3: ccs1
+	 *
+	 * With 4 engines (ccs0, ccs1, ccs2, ccs3):
+	 *   slice 0: ccs0
+	 *   slice 1: ccs1
+	 *   slice 2: ccs2
+	 *   slice 3: ccs3
+	 */
+	for (width = num_slices / num_engines; width; width--) {
+		struct xe_hw_engine *hwe;
+		enum xe_hw_engine_id id;
+
+		for_each_hw_engine(hwe, gt, id) {
+			if (hwe->class != XE_ENGINE_CLASS_COMPUTE)
+				continue;
+
+			if (hwe->logical_instance >= num_engines)
+				break;
+
+			config |= BIT(hwe->instance) << XE_HW_ENGINE_CCS0;
+
+			/* If a slice is fused off, leave disabled */
+			while ((CCS_MASK(gt) & BIT(cslice)) == 0)
+				cslice++;
+
+			mode &= ~CCS_MODE_CSLICE(cslice, CCS_MODE_CSLICE_MASK);
+			mode |= CCS_MODE_CSLICE(cslice, hwe->instance);
+			cslice++;
+		}
+	}
+
+	xe_mmio_write32(gt, CCS_MODE, mode);
+
+	xe_gt_info(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
+		   mode, config, num_engines, num_slices);
+}
+
+void xe_gt_apply_ccs_mode(struct xe_gt *gt)
+{
+	if (!gt->ccs_mode)
+		return;
+
+	__xe_gt_apply_ccs_mode(gt, gt->ccs_mode);
+}
+
+static ssize_t
+num_cslices_show(struct device *kdev,
+		 struct device_attribute *attr, char *buf)
+{
+	struct xe_gt *gt = kobj_to_gt(&kdev->kobj);
+
+	return sysfs_emit(buf, "%u\n", hweight32(CCS_MASK(gt)));
+}
+
+static DEVICE_ATTR_RO(num_cslices);
+
+static ssize_t
+ccs_mode_show(struct device *kdev,
+	      struct device_attribute *attr, char *buf)
+{
+	struct xe_gt *gt = kobj_to_gt(&kdev->kobj);
+
+	return sysfs_emit(buf, "%u\n", gt->ccs_mode);
+}
+
+static ssize_t
+ccs_mode_store(struct device *kdev, struct device_attribute *attr,
+	       const char *buff, size_t count)
+{
+	struct xe_gt *gt = kobj_to_gt(&kdev->kobj);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 num_engines, num_slices;
+	int ret;
+
+	ret = kstrtou32(buff, 0, &num_engines);
+	if (ret)
+		return ret;
+
+	/*
+	 * Ensure number of engines specified is valid and there is an
+	 * exact multiple of engines for slices.
+	 */
+	num_slices = hweight32(CCS_MASK(gt));
+	if (!num_engines || num_engines > num_slices || num_slices % num_engines) {
+		xe_gt_dbg(gt, "Invalid compute config, %d engines %d slices\n",
+			  num_engines, num_slices);
+		return -EINVAL;
+	}
+
+	/* CCS mode can only be updated when there are no drm clients */
+	spin_lock(&xe->clients.lock);
+	if (xe->clients.count) {
+		spin_unlock(&xe->clients.lock);
+		return -EBUSY;
+	}
+
+	if (gt->ccs_mode != num_engines) {
+		xe_gt_info(gt, "Setting compute mode to %d\n", num_engines);
+		gt->ccs_mode = num_engines;
+		xe_gt_reset_async(gt);
+	}
+
+	spin_unlock(&xe->clients.lock);
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(ccs_mode);
+
+static const struct attribute *gt_ccs_mode_attrs[] = {
+	&dev_attr_ccs_mode.attr,
+	&dev_attr_num_cslices.attr,
+	NULL,
+};
+
+static void xe_gt_ccs_mode_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+
+	sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs);
+}
+
+/**
+ * xe_gt_ccs_mode_sysfs_init - Initialize CCS mode sysfs interfaces
+ * @gt: GT structure
+ *
+ * Through a per-gt 'ccs_mode' sysfs interface, the user can enable a fixed
+ * number of compute hardware engines to which the available compute slices
+ * are to be allocated. This user configuration change triggers a gt reset
+ * and it is expected that there are no open drm clients while doing so.
+ * The number of available compute slices is exposed to user through a per-gt
+ * 'num_cslices' sysfs interface.
+ */
+void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	if (!xe_gt_ccs_mode_enabled(gt))
+		return;
+
+	err = sysfs_create_files(gt->sysfs, gt_ccs_mode_attrs);
+	if (err) {
+		drm_warn(&xe->drm, "Sysfs creation for ccs_mode failed err: %d\n", err);
+		return;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt);
+	if (err) {
+		sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs);
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
new file mode 100644
index 000000000000..f39975aaaab0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_CCS_MODE_H_
+#define _XE_GT_CCS_MODE_H_
+
+#include "xe_device_types.h"
+#include "xe_gt.h"
+#include "xe_gt_types.h"
+#include "xe_platform_types.h"
+
+void xe_gt_apply_ccs_mode(struct xe_gt *gt);
+void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt);
+
+static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt)
+{
+	/* Check if there are more than one compute engines available */
+	return hweight32(CCS_MASK(gt)) > 1;
+}
+
+#endif
+
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
new file mode 100644
index 000000000000..937054e31d72
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt_clock.h"
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+
+static u32 read_reference_ts_freq(struct xe_gt *gt)
+{
+	u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE);
+	u32 base_freq, frac_freq;
+
+	base_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK,
+				  ts_override) + 1;
+	base_freq *= 1000000;
+
+	frac_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK,
+				  ts_override);
+	frac_freq = 1000000 / (frac_freq + 1);
+
+	return base_freq + frac_freq;
+}
+
+static u32 get_crystal_clock_freq(u32 rpm_config_reg)
+{
+	const u32 f19_2_mhz = 19200000;
+	const u32 f24_mhz = 24000000;
+	const u32 f25_mhz = 25000000;
+	const u32 f38_4_mhz = 38400000;
+	u32 crystal_clock = REG_FIELD_GET(RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK,
+					  rpm_config_reg);
+
+	switch (crystal_clock) {
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
+		return f24_mhz;
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
+		return f19_2_mhz;
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
+		return f38_4_mhz;
+	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
+		return f25_mhz;
+	default:
+		XE_WARN_ON("NOT_POSSIBLE");
+		return 0;
+	}
+}
+
+int xe_gt_clock_init(struct xe_gt *gt)
+{
+	u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE);
+	u32 freq = 0;
+
+	/* Assuming gen11+ so assert this assumption is correct */
+	xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
+
+	if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) {
+		freq = read_reference_ts_freq(gt);
+	} else {
+		u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0);
+
+		freq = get_crystal_clock_freq(c0);
+
+		/*
+		 * Now figure out how the command stream's timestamp
+		 * register increments from this frequency (it might
+		 * increment only every few clock cycle).
+		 */
+		freq >>= 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0);
+	}
+
+	gt->info.reference_clock = freq;
+	return 0;
+}
+
+u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count)
+{
+	return DIV_ROUND_CLOSEST_ULL(count * NSEC_PER_SEC, gt->info.reference_clock);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h
new file mode 100644
index 000000000000..aa162722f859
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_clock.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_CLOCK_H_
+#define _XE_GT_CLOCK_H_
+
+#include <linux/types.h>
+
+struct xe_gt;
+
+int xe_gt_clock_init(struct xe_gt *gt);
+u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
new file mode 100644
index 000000000000..c4b67cf09f8f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt_debugfs.h"
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_topology.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_pat.h"
+#include "xe_reg_sr.h"
+#include "xe_reg_whitelist.h"
+#include "xe_uc_debugfs.h"
+#include "xe_wa.h"
+
+static struct xe_gt *node_to_gt(struct drm_info_node *node)
+{
+	return node->info_ent->data;
+}
+
+static int hw_engines(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct drm_printer p = drm_seq_file_printer(m);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	int err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (err) {
+		xe_device_mem_access_put(xe);
+		return err;
+	}
+
+	for_each_hw_engine(hwe, gt, id)
+		xe_hw_engine_print(hwe, &p);
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	xe_device_mem_access_put(xe);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int force_reset(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+
+	xe_gt_reset_async(gt);
+
+	return 0;
+}
+
+static int sa_info(struct seq_file *m, void *data)
+{
+	struct xe_tile *tile = gt_to_tile(node_to_gt(m->private));
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, &p,
+				     tile->mem.kernel_bb_pool->gpu_addr);
+
+	return 0;
+}
+
+static int topology(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_gt_topology_dump(gt, &p);
+
+	return 0;
+}
+
+static int steering(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_gt_mcr_steering_dump(gt, &p);
+
+	return 0;
+}
+
+static int ggtt(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	return xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, &p);
+}
+
+static int register_save_restore(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	xe_reg_sr_dump(&gt->reg_sr, &p);
+	drm_printf(&p, "\n");
+
+	drm_printf(&p, "Engine\n");
+	for_each_hw_engine(hwe, gt, id)
+		xe_reg_sr_dump(&hwe->reg_sr, &p);
+	drm_printf(&p, "\n");
+
+	drm_printf(&p, "LRC\n");
+	for_each_hw_engine(hwe, gt, id)
+		xe_reg_sr_dump(&hwe->reg_lrc, &p);
+	drm_printf(&p, "\n");
+
+	drm_printf(&p, "Whitelist\n");
+	for_each_hw_engine(hwe, gt, id)
+		xe_reg_whitelist_dump(&hwe->reg_whitelist, &p);
+
+	return 0;
+}
+
+static int workarounds(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_wa_dump(gt, &p);
+
+	return 0;
+}
+
+static int pat(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_pat_dump(gt, &p);
+
+	return 0;
+}
+
+static int rcs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_RENDER);
+	return 0;
+}
+
+static int ccs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COMPUTE);
+	return 0;
+}
+
+static int bcs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COPY);
+	return 0;
+}
+
+static int vcs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_DECODE);
+	return 0;
+}
+
+static int vecs_default_lrc(struct seq_file *m, void *data)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_ENHANCE);
+	return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+	{"hw_engines", hw_engines, 0},
+	{"force_reset", force_reset, 0},
+	{"sa_info", sa_info, 0},
+	{"topology", topology, 0},
+	{"steering", steering, 0},
+	{"ggtt", ggtt, 0},
+	{"register-save-restore", register_save_restore, 0},
+	{"workarounds", workarounds, 0},
+	{"pat", pat, 0},
+	{"default_lrc_rcs", rcs_default_lrc},
+	{"default_lrc_ccs", ccs_default_lrc},
+	{"default_lrc_bcs", bcs_default_lrc},
+	{"default_lrc_vcs", vcs_default_lrc},
+	{"default_lrc_vecs", vecs_default_lrc},
+};
+
+void xe_gt_debugfs_register(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct drm_minor *minor = gt_to_xe(gt)->drm.primary;
+	struct dentry *root;
+	struct drm_info_list *local;
+	char name[8];
+	int i;
+
+	xe_gt_assert(gt, minor->debugfs_root);
+
+	sprintf(name, "gt%d", gt->info.id);
+	root = debugfs_create_dir(name, minor->debugfs_root);
+	if (IS_ERR(root)) {
+		drm_warn(&xe->drm, "Create GT directory failed");
+		return;
+	}
+
+	/*
+	 * Allocate local copy as we need to pass in the GT to the debugfs
+	 * entry and drm_debugfs_create_files just references the drm_info_list
+	 * passed in (e.g. can't define this on the stack).
+	 */
+#define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
+	local = drmm_kmalloc(&xe->drm, DEBUGFS_SIZE, GFP_KERNEL);
+	if (!local)
+		return;
+
+	memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+		local[i].data = gt;
+
+	drm_debugfs_create_files(local,
+				 ARRAY_SIZE(debugfs_list),
+				 root, minor);
+
+	xe_uc_debugfs_register(&gt->uc, root);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h
new file mode 100644
index 000000000000..5a329f118a57
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_DEBUGFS_H_
+#define _XE_GT_DEBUGFS_H_
+
+struct xe_gt;
+
+void xe_gt_debugfs_register(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
new file mode 100644
index 000000000000..3adfa6686e7c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gt_freq.h"
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+#include "xe_gt_sysfs.h"
+#include "xe_gt_throttle_sysfs.h"
+#include "xe_guc_pc.h"
+
+/**
+ * DOC: Xe GT Frequency Management
+ *
+ * This component is responsible for the raw GT frequency management, including
+ * the sysfs API.
+ *
+ * Underneath, Xe enables GuC SLPC automated frequency management. GuC is then
+ * allowed to request PCODE any frequency between the Minimum and the Maximum
+ * selected by this component. Furthermore, it is important to highlight that
+ * PCODE is the ultimate decision maker of the actual running frequency, based
+ * on thermal and other running conditions.
+ *
+ * Xe's Freq provides a sysfs API for frequency management:
+ *
+ * device/tile#/gt#/freq0/<item>_freq *read-only* files:
+ * - act_freq: The actual resolved frequency decided by PCODE.
+ * - cur_freq: The current one requested by GuC PC to the PCODE.
+ * - rpn_freq: The Render Performance (RP) N level, which is the minimal one.
+ * - rpe_freq: The Render Performance (RP) E level, which is the efficient one.
+ * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one.
+ *
+ * device/tile#/gt#/freq0/<item>_freq *read-write* files:
+ * - min_freq: Min frequency request.
+ * - max_freq: Max frequency request.
+ *             If max <= min, then freq_min becomes a fixed frequency request.
+ */
+
+static struct xe_guc_pc *
+dev_to_pc(struct device *dev)
+{
+	return &kobj_to_gt(dev->kobj.parent)->uc.guc.pc;
+}
+
+static ssize_t act_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_act_freq(pc));
+}
+static DEVICE_ATTR_RO(act_freq);
+
+static ssize_t cur_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = xe_guc_pc_get_cur_freq(pc, &freq);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%d\n", freq);
+}
+static DEVICE_ATTR_RO(cur_freq);
+
+static ssize_t rp0_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rp0_freq(pc));
+}
+static DEVICE_ATTR_RO(rp0_freq);
+
+static ssize_t rpe_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpe_freq(pc));
+}
+static DEVICE_ATTR_RO(rpe_freq);
+
+static ssize_t rpn_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+
+	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpn_freq(pc));
+}
+static DEVICE_ATTR_RO(rpn_freq);
+
+static ssize_t min_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = xe_guc_pc_get_min_freq(pc, &freq);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%d\n", freq);
+}
+
+static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
+			      const char *buff, size_t count)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = kstrtou32(buff, 0, &freq);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_pc_set_min_freq(pc, freq);
+	if (ret)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_RW(min_freq);
+
+static ssize_t max_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = xe_guc_pc_get_max_freq(pc, &freq);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%d\n", freq);
+}
+
+static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
+			      const char *buff, size_t count)
+{
+	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+	ssize_t ret;
+
+	ret = kstrtou32(buff, 0, &freq);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_pc_set_max_freq(pc, freq);
+	if (ret)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_RW(max_freq);
+
+static const struct attribute *freq_attrs[] = {
+	&dev_attr_act_freq.attr,
+	&dev_attr_cur_freq.attr,
+	&dev_attr_rp0_freq.attr,
+	&dev_attr_rpe_freq.attr,
+	&dev_attr_rpn_freq.attr,
+	&dev_attr_min_freq.attr,
+	&dev_attr_max_freq.attr,
+	NULL
+};
+
+static void freq_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_files(kobj, freq_attrs);
+	kobject_put(kobj);
+}
+
+/**
+ * xe_gt_freq_init - Initialize Xe Freq component
+ * @gt: Xe GT object
+ *
+ * It needs to be initialized after GT Sysfs and GuC PC components are ready.
+ */
+void xe_gt_freq_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	gt->freq = kobject_create_and_add("freq0", gt->sysfs);
+	if (!gt->freq) {
+		drm_warn(&xe->drm, "failed to add freq0 directory to %s\n",
+			 kobject_name(gt->sysfs));
+		return;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, freq_fini, gt->freq);
+	if (err) {
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+		return;
+	}
+
+	err = sysfs_create_files(gt->freq, freq_attrs);
+	if (err)
+		drm_warn(&xe->drm,  "failed to add freq attrs to %s, err: %d\n",
+			 kobject_name(gt->freq), err);
+
+	xe_gt_throttle_sysfs_init(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.h b/drivers/gpu/drm/xe/xe_gt_freq.h
new file mode 100644
index 000000000000..f3fe3c90491a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_freq.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_FREQ_H_
+#define _XE_GT_FREQ_H_
+
+struct xe_gt;
+
+void xe_gt_freq_init(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
new file mode 100644
index 000000000000..9358f7336889
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_idle.h"
+#include "xe_gt_sysfs.h"
+#include "xe_guc_pc.h"
+#include "regs/xe_gt_regs.h"
+#include "xe_mmio.h"
+
+/**
+ * DOC: Xe GT Idle
+ *
+ * Contains functions that init GT idle features like C6
+ *
+ * device/gt#/gtidle/name - name of the state
+ * device/gt#/gtidle/idle_residency_ms - Provides residency of the idle state in ms
+ * device/gt#/gtidle/idle_status - Provides current idle state
+ */
+
+static struct xe_gt_idle *dev_to_gtidle(struct device *dev)
+{
+	struct kobject *kobj = &dev->kobj;
+
+	return &kobj_to_gt(kobj->parent)->gtidle;
+}
+
+static struct xe_gt *gtidle_to_gt(struct xe_gt_idle *gtidle)
+{
+	return container_of(gtidle, struct xe_gt, gtidle);
+}
+
+static struct xe_guc_pc *gtidle_to_pc(struct xe_gt_idle *gtidle)
+{
+	return &gtidle_to_gt(gtidle)->uc.guc.pc;
+}
+
+static const char *gt_idle_state_to_string(enum xe_gt_idle_state state)
+{
+	switch (state) {
+	case GT_IDLE_C0:
+		return "gt-c0";
+	case GT_IDLE_C6:
+		return "gt-c6";
+	default:
+		return "unknown";
+	}
+}
+
+static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency)
+{
+	u64 delta, overflow_residency, prev_residency;
+
+	overflow_residency = BIT_ULL(32);
+
+	/*
+	 * Counter wrap handling
+	 * Store previous hw counter values for counter wrap-around handling
+	 * Relying on sufficient frequency of queries otherwise counters can still wrap.
+	 */
+	prev_residency = gtidle->prev_residency;
+	gtidle->prev_residency = cur_residency;
+
+	/* delta */
+	if (cur_residency >= prev_residency)
+		delta = cur_residency - prev_residency;
+	else
+		delta = cur_residency + (overflow_residency - prev_residency);
+
+	/* Add delta to extended raw driver copy of idle residency */
+	cur_residency = gtidle->cur_residency + delta;
+	gtidle->cur_residency = cur_residency;
+
+	/* residency multiplier in ns, convert to ms */
+	cur_residency = mul_u64_u32_div(cur_residency, gtidle->residency_multiplier, 1e6);
+
+	return cur_residency;
+}
+
+static ssize_t name_show(struct device *dev,
+			 struct device_attribute *attr, char *buff)
+{
+	struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
+
+	return sysfs_emit(buff, "%s\n", gtidle->name);
+}
+static DEVICE_ATTR_RO(name);
+
+static ssize_t idle_status_show(struct device *dev,
+				struct device_attribute *attr, char *buff)
+{
+	struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
+	struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
+	enum xe_gt_idle_state state;
+
+	state = gtidle->idle_status(pc);
+
+	return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state));
+}
+static DEVICE_ATTR_RO(idle_status);
+
+static ssize_t idle_residency_ms_show(struct device *dev,
+				      struct device_attribute *attr, char *buff)
+{
+	struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
+	struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
+	u64 residency;
+
+	residency = gtidle->idle_residency(pc);
+	return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency));
+}
+static DEVICE_ATTR_RO(idle_residency_ms);
+
+static const struct attribute *gt_idle_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_idle_status.attr,
+	&dev_attr_idle_residency_ms.attr,
+	NULL,
+};
+
+static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_files(kobj, gt_idle_attrs);
+	kobject_put(kobj);
+}
+
+void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
+{
+	struct xe_gt *gt = gtidle_to_gt(gtidle);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct kobject *kobj;
+	int err;
+
+	kobj = kobject_create_and_add("gtidle", gt->sysfs);
+	if (!kobj) {
+		drm_warn(&xe->drm, "%s failed, err: %d\n", __func__, -ENOMEM);
+		return;
+	}
+
+	if (xe_gt_is_media_type(gt)) {
+		sprintf(gtidle->name, "gt%d-mc\n", gt->info.id);
+		gtidle->idle_residency = xe_guc_pc_mc6_residency;
+	} else {
+		sprintf(gtidle->name, "gt%d-rc\n", gt->info.id);
+		gtidle->idle_residency = xe_guc_pc_rc6_residency;
+	}
+
+	/* Multiplier for Residency counter in units of 1.28us */
+	gtidle->residency_multiplier = 1280;
+	gtidle->idle_status = xe_guc_pc_c_status;
+
+	err = sysfs_create_files(kobj, gt_idle_attrs);
+	if (err) {
+		kobject_put(kobj);
+		drm_warn(&xe->drm, "failed to register gtidle sysfs, err: %d\n", err);
+		return;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj);
+	if (err)
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+}
+
+void xe_gt_idle_enable_c6(struct xe_gt *gt)
+{
+	xe_device_assert_mem_access(gt_to_xe(gt));
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	/* Units of 1280 ns for a total of 5s */
+	xe_mmio_write32(gt, RC_IDLE_HYSTERSIS, 0x3B9ACA);
+	/* Enable RC6 */
+	xe_mmio_write32(gt, RC_CONTROL,
+			RC_CTL_HW_ENABLE | RC_CTL_TO_MODE | RC_CTL_RC6_ENABLE);
+}
+
+void xe_gt_idle_disable_c6(struct xe_gt *gt)
+{
+	xe_device_assert_mem_access(gt_to_xe(gt));
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+
+	xe_mmio_write32(gt, PG_ENABLE, 0);
+	xe_mmio_write32(gt, RC_CONTROL, 0);
+	xe_mmio_write32(gt, RC_STATE, 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h
new file mode 100644
index 000000000000..69280fd16b03
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_idle.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_IDLE_H_
+#define _XE_GT_IDLE_H_
+
+#include "xe_gt_idle_types.h"
+
+struct xe_gt;
+
+void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
+void xe_gt_idle_enable_c6(struct xe_gt *gt);
+void xe_gt_idle_disable_c6(struct xe_gt *gt);
+
+#endif /* _XE_GT_IDLE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h
new file mode 100644
index 000000000000..f99b447534f3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_IDLE_SYSFS_TYPES_H_
+#define _XE_GT_IDLE_SYSFS_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_guc_pc;
+
+/* States of GT Idle */
+enum xe_gt_idle_state {
+	GT_IDLE_C0,
+	GT_IDLE_C6,
+	GT_IDLE_UNKNOWN,
+};
+
+/**
+ * struct xe_gt_idle - A struct that contains idle properties based of gt
+ */
+struct xe_gt_idle {
+	/** @name: name */
+	char name[16];
+	/** @residency_multiplier: residency multiplier in ns */
+	u32 residency_multiplier;
+	/** @cur_residency: raw driver copy of idle residency */
+	u64 cur_residency;
+	/** @prev_residency: previous residency counter */
+	u64 prev_residency;
+	/** @idle_status: get the current idle state */
+	enum xe_gt_idle_state (*idle_status)(struct xe_guc_pc *pc);
+	/** @idle_residency: get idle residency counter */
+	u64 (*idle_residency)(struct xe_guc_pc *pc);
+};
+
+#endif /* _XE_GT_IDLE_SYSFS_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
new file mode 100644
index 000000000000..77925b35cf8d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -0,0 +1,685 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt_mcr.h"
+
+#include "regs/xe_gt_regs.h"
+#include "xe_gt.h"
+#include "xe_gt_topology.h"
+#include "xe_gt_types.h"
+#include "xe_mmio.h"
+
+/**
+ * DOC: GT Multicast/Replicated (MCR) Register Support
+ *
+ * Some GT registers are designed as "multicast" or "replicated" registers:
+ * multiple instances of the same register share a single MMIO offset.  MCR
+ * registers are generally used when the hardware needs to potentially track
+ * independent values of a register per hardware unit (e.g., per-subslice,
+ * per-L3bank, etc.).  The specific types of replication that exist vary
+ * per-platform.
+ *
+ * MMIO accesses to MCR registers are controlled according to the settings
+ * programmed in the platform's MCR_SELECTOR register(s).  MMIO writes to MCR
+ * registers can be done in either multicast (a single write updates all
+ * instances of the register to the same value) or unicast (a write updates only
+ * one specific instance) form.  Reads of MCR registers always operate in a
+ * unicast manner regardless of how the multicast/unicast bit is set in
+ * MCR_SELECTOR.  Selection of a specific MCR instance for unicast operations is
+ * referred to as "steering."
+ *
+ * If MCR register operations are steered toward a hardware unit that is
+ * fused off or currently powered down due to power gating, the MMIO operation
+ * is "terminated" by the hardware.  Terminated read operations will return a
+ * value of zero and terminated unicast write operations will be silently
+ * ignored. During device initialization, the goal of the various
+ * ``init_steering_*()`` functions is to apply the platform-specific rules for
+ * each MCR register type to identify a steering target that will select a
+ * non-terminated instance.
+ */
+
+#define STEER_SEMAPHORE		XE_REG(0xFD0)
+
+static inline struct xe_reg to_xe_reg(struct xe_reg_mcr reg_mcr)
+{
+	return reg_mcr.__reg;
+}
+
+enum {
+	MCR_OP_READ,
+	MCR_OP_WRITE
+};
+
+static const struct xe_mmio_range xelp_l3bank_steering_table[] = {
+	{ 0x00B100, 0x00B3FF },
+	{},
+};
+
+static const struct xe_mmio_range xehp_l3bank_steering_table[] = {
+	{ 0x008C80, 0x008CFF },
+	{ 0x00B100, 0x00B3FF },
+	{},
+};
+
+/*
+ * Although the bspec lists more "MSLICE" ranges than shown here, some of those
+ * are of a "GAM" subclass that has special rules and doesn't need to be
+ * included here.
+ */
+static const struct xe_mmio_range xehp_mslice_steering_table[] = {
+	{ 0x00DD00, 0x00DDFF },
+	{ 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
+	{},
+};
+
+static const struct xe_mmio_range xehp_lncf_steering_table[] = {
+	{ 0x00B000, 0x00B0FF },
+	{ 0x00D880, 0x00D8FF },
+	{},
+};
+
+/*
+ * We have several types of MCR registers where steering to (0,0) will always
+ * provide us with a non-terminated value.  We'll stick them all in the same
+ * table for simplicity.
+ */
+static const struct xe_mmio_range xehpc_instance0_steering_table[] = {
+	{ 0x004000, 0x004AFF },		/* HALF-BSLICE */
+	{ 0x008800, 0x00887F },		/* CC */
+	{ 0x008A80, 0x008AFF },		/* TILEPSMI */
+	{ 0x00B000, 0x00B0FF },		/* HALF-BSLICE */
+	{ 0x00B100, 0x00B3FF },		/* L3BANK */
+	{ 0x00C800, 0x00CFFF },		/* HALF-BSLICE */
+	{ 0x00D800, 0x00D8FF },		/* HALF-BSLICE */
+	{ 0x00DD00, 0x00DDFF },		/* BSLICE */
+	{ 0x00E900, 0x00E9FF },		/* HALF-BSLICE */
+	{ 0x00EC00, 0x00EEFF },		/* HALF-BSLICE */
+	{ 0x00F000, 0x00FFFF },		/* HALF-BSLICE */
+	{ 0x024180, 0x0241FF },		/* HALF-BSLICE */
+	{},
+};
+
+static const struct xe_mmio_range xelpg_instance0_steering_table[] = {
+	{ 0x000B00, 0x000BFF },         /* SQIDI */
+	{ 0x001000, 0x001FFF },         /* SQIDI */
+	{ 0x004000, 0x0048FF },         /* GAM */
+	{ 0x008700, 0x0087FF },         /* SQIDI */
+	{ 0x00B000, 0x00B0FF },         /* NODE */
+	{ 0x00C800, 0x00CFFF },         /* GAM */
+	{ 0x00D880, 0x00D8FF },         /* NODE */
+	{ 0x00DD00, 0x00DDFF },         /* OAAL2 */
+	{},
+};
+
+static const struct xe_mmio_range xelpg_l3bank_steering_table[] = {
+	{ 0x00B100, 0x00B3FF },
+	{},
+};
+
+static const struct xe_mmio_range xelp_dss_steering_table[] = {
+	{ 0x008150, 0x00815F },
+	{ 0x009520, 0x00955F },
+	{ 0x00DE80, 0x00E8FF },
+	{ 0x024A00, 0x024A7F },
+	{},
+};
+
+/* DSS steering is used for GSLICE ranges as well */
+static const struct xe_mmio_range xehp_dss_steering_table[] = {
+	{ 0x005200, 0x0052FF },		/* GSLICE */
+	{ 0x005400, 0x007FFF },		/* GSLICE */
+	{ 0x008140, 0x00815F },		/* GSLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+	{ 0x008D00, 0x008DFF },		/* DSS */
+	{ 0x0094D0, 0x00955F },		/* GSLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },		/* DSS */
+	{ 0x00D800, 0x00D87F },		/* GSLICE */
+	{ 0x00DC00, 0x00DCFF },		/* GSLICE */
+	{ 0x00DE80, 0x00E8FF },		/* DSS (0xE000-0xE0FF reserved ) */
+	{ 0x017000, 0x017FFF },		/* GSLICE */
+	{ 0x024A00, 0x024A7F },		/* DSS */
+	{},
+};
+
+/* DSS steering is used for COMPUTE ranges as well */
+static const struct xe_mmio_range xehpc_dss_steering_table[] = {
+	{ 0x008140, 0x00817F },		/* COMPUTE (0x8140-0x814F & 0x8160-0x817F), DSS (0x8150-0x815F) */
+	{ 0x0094D0, 0x00955F },		/* COMPUTE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },		/* DSS */
+	{ 0x00DC00, 0x00DCFF },		/* COMPUTE */
+	{ 0x00DE80, 0x00E7FF },		/* DSS (0xDF00-0xE1FF reserved ) */
+	{},
+};
+
+/* DSS steering is used for SLICE ranges as well */
+static const struct xe_mmio_range xelpg_dss_steering_table[] = {
+	{ 0x005200, 0x0052FF },		/* SLICE */
+	{ 0x005500, 0x007FFF },		/* SLICE */
+	{ 0x008140, 0x00815F },		/* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+	{ 0x0094D0, 0x00955F },		/* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },		/* DSS */
+	{ 0x00D800, 0x00D87F },		/* SLICE */
+	{ 0x00DC00, 0x00DCFF },		/* SLICE */
+	{ 0x00DE80, 0x00E8FF },		/* DSS (0xE000-0xE0FF reserved) */
+	{},
+};
+
+static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = {
+	{ 0x393200, 0x39323F },
+	{ 0x393400, 0x3934FF },
+	{},
+};
+
+static const struct xe_mmio_range dg2_implicit_steering_table[] = {
+	{ 0x000B00, 0x000BFF },		/* SF (SQIDI replication) */
+	{ 0x001000, 0x001FFF },		/* SF (SQIDI replication) */
+	{ 0x004000, 0x004AFF },		/* GAM (MSLICE replication) */
+	{ 0x008700, 0x0087FF },		/* MCFG (SQIDI replication) */
+	{ 0x00C800, 0x00CFFF },		/* GAM (MSLICE replication) */
+	{ 0x00F000, 0x00FFFF },		/* GAM (MSLICE replication) */
+	{},
+};
+
+static const struct xe_mmio_range xe2lpg_dss_steering_table[] = {
+	{ 0x005200, 0x0052FF },         /* SLICE */
+	{ 0x005500, 0x007FFF },         /* SLICE */
+	{ 0x008140, 0x00815F },         /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+	{ 0x0094D0, 0x00955F },         /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+	{ 0x009680, 0x0096FF },         /* DSS */
+	{ 0x00D800, 0x00D87F },         /* SLICE */
+	{ 0x00DC00, 0x00DCFF },         /* SLICE */
+	{ 0x00DE80, 0x00E8FF },         /* DSS (0xE000-0xE0FF reserved) */
+	{ 0x00E980, 0x00E9FF },         /* SLICE */
+	{ 0x013000, 0x0133FF },         /* DSS (0x13000-0x131FF), SLICE (0x13200-0x133FF) */
+	{},
+};
+
+static const struct xe_mmio_range xe2lpg_sqidi_psmi_steering_table[] = {
+	{ 0x000B00, 0x000BFF },
+	{ 0x001000, 0x001FFF },
+	{},
+};
+
+static const struct xe_mmio_range xe2lpg_instance0_steering_table[] = {
+	{ 0x004000, 0x004AFF },         /* GAM, rsvd, GAMWKR */
+	{ 0x008700, 0x00887F },         /* SQIDI, MEMPIPE */
+	{ 0x00B000, 0x00B3FF },         /* NODE, L3BANK */
+	{ 0x00C800, 0x00CFFF },         /* GAM */
+	{ 0x00D880, 0x00D8FF },         /* NODE */
+	{ 0x00DD00, 0x00DDFF },         /* MEMPIPE */
+	{ 0x00E900, 0x00E97F },         /* MEMPIPE */
+	{ 0x00F000, 0x00FFFF },         /* GAM, GAMWKR */
+	{ 0x013400, 0x0135FF },         /* MEMPIPE */
+	{},
+};
+
+static const struct xe_mmio_range xe2lpm_gpmxmt_steering_table[] = {
+	{ 0x388160, 0x38817F },
+	{ 0x389480, 0x3894CF },
+	{},
+};
+
+static const struct xe_mmio_range xe2lpm_instance0_steering_table[] = {
+	{ 0x384000, 0x3847DF },         /* GAM, rsvd, GAM */
+	{ 0x384900, 0x384AFF },         /* GAM */
+	{ 0x389560, 0x3895FF },         /* MEDIAINF */
+	{ 0x38B600, 0x38B8FF },         /* L3BANK */
+	{ 0x38C800, 0x38D07F },         /* GAM, MEDIAINF */
+	{ 0x38F000, 0x38F0FF },         /* GAM */
+	{ 0x393C00, 0x393C7F },         /* MEDIAINF */
+	{},
+};
+
+static void init_steering_l3bank(struct xe_gt *gt)
+{
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+		u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
+						xe_mmio_read32(gt, MIRROR_FUSE3));
+		u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK,
+					      xe_mmio_read32(gt, XEHP_FUSE4));
+
+		/*
+		 * Group selects mslice, instance selects bank within mslice.
+		 * Bank 0 is always valid _except_ when the bank mask is 010b.
+		 */
+		gt->steering[L3BANK].group_target = __ffs(mslice_mask);
+		gt->steering[L3BANK].instance_target =
+			bank_mask & BIT(0) ? 0 : 2;
+	} else if (gt_to_xe(gt)->info.platform == XE_DG2) {
+		u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
+						xe_mmio_read32(gt, MIRROR_FUSE3));
+		u32 bank = __ffs(mslice_mask) * 8;
+
+		/*
+		 * Like mslice registers, look for a valid mslice and steer to
+		 * the first L3BANK of that quad. Access to the Nth L3 bank is
+		 * split between the first bits of group and instance
+		 */
+		gt->steering[L3BANK].group_target = (bank >> 2) & 0x7;
+		gt->steering[L3BANK].instance_target = bank & 0x3;
+	} else {
+		u32 fuse = REG_FIELD_GET(L3BANK_MASK,
+					 ~xe_mmio_read32(gt, MIRROR_FUSE3));
+
+		gt->steering[L3BANK].group_target = 0;	/* unused */
+		gt->steering[L3BANK].instance_target = __ffs(fuse);
+	}
+}
+
+static void init_steering_mslice(struct xe_gt *gt)
+{
+	u32 mask = REG_FIELD_GET(MEML3_EN_MASK,
+				 xe_mmio_read32(gt, MIRROR_FUSE3));
+
+	/*
+	 * mslice registers are valid (not terminated) if either the meml3
+	 * associated with the mslice is present, or at least one DSS associated
+	 * with the mslice is present.  There will always be at least one meml3
+	 * so we can just use that to find a non-terminated mslice and ignore
+	 * the DSS fusing.
+	 */
+	gt->steering[MSLICE].group_target = __ffs(mask);
+	gt->steering[MSLICE].instance_target = 0;	/* unused */
+
+	/*
+	 * LNCF termination is also based on mslice presence, so we'll set
+	 * it up here.  Either LNCF within a non-terminated mslice will work,
+	 * so we just always pick LNCF 0 here.
+	 */
+	gt->steering[LNCF].group_target = __ffs(mask) << 1;
+	gt->steering[LNCF].instance_target = 0;		/* unused */
+}
+
+static void init_steering_dss(struct xe_gt *gt)
+{
+	unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0),
+			       xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0));
+	unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4;
+
+	gt->steering[DSS].group_target = dss / dss_per_grp;
+	gt->steering[DSS].instance_target = dss % dss_per_grp;
+}
+
+static void init_steering_oaddrm(struct xe_gt *gt)
+{
+	/*
+	 * First instance is only terminated if the entire first media slice
+	 * is absent (i.e., no VCS0 or VECS0).
+	 */
+	if (gt->info.engine_mask & (XE_HW_ENGINE_VCS0 | XE_HW_ENGINE_VECS0))
+		gt->steering[OADDRM].group_target = 0;
+	else
+		gt->steering[OADDRM].group_target = 1;
+
+	gt->steering[DSS].instance_target = 0;		/* unused */
+}
+
+static void init_steering_sqidi_psmi(struct xe_gt *gt)
+{
+	u32 mask = REG_FIELD_GET(XE2_NODE_ENABLE_MASK,
+				 xe_mmio_read32(gt, MIRROR_FUSE3));
+	u32 select = __ffs(mask);
+
+	gt->steering[SQIDI_PSMI].group_target = select >> 1;
+	gt->steering[SQIDI_PSMI].instance_target = select & 0x1;
+}
+
+static void init_steering_inst0(struct xe_gt *gt)
+{
+	gt->steering[DSS].group_target = 0;		/* unused */
+	gt->steering[DSS].instance_target = 0;		/* unused */
+}
+
+static const struct {
+	const char *name;
+	void (*init)(struct xe_gt *gt);
+} xe_steering_types[] = {
+	[L3BANK] =	{ "L3BANK",	init_steering_l3bank },
+	[MSLICE] =	{ "MSLICE",	init_steering_mslice },
+	[LNCF] =	{ "LNCF",	NULL }, /* initialized by mslice init */
+	[DSS] =		{ "DSS",	init_steering_dss },
+	[OADDRM] =	{ "OADDRM / GPMXMT", init_steering_oaddrm },
+	[SQIDI_PSMI] =  { "SQIDI_PSMI", init_steering_sqidi_psmi },
+	[INSTANCE0] =	{ "INSTANCE 0",	init_steering_inst0 },
+	[IMPLICIT_STEERING] = { "IMPLICIT", NULL },
+};
+
+void xe_gt_mcr_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES);
+	BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES);
+
+	spin_lock_init(&gt->mcr_lock);
+
+	if (gt->info.type == XE_GT_TYPE_MEDIA) {
+		drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13);
+
+		if (MEDIA_VER(xe) >= 20) {
+			gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table;
+			gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table;
+		} else {
+			gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table;
+		}
+	} else {
+		if (GRAPHICS_VER(xe) >= 20) {
+			gt->steering[DSS].ranges = xe2lpg_dss_steering_table;
+			gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table;
+			gt->steering[INSTANCE0].ranges = xe2lpg_instance0_steering_table;
+		} else if (GRAPHICS_VERx100(xe) >= 1270) {
+			gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table;
+			gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table;
+			gt->steering[DSS].ranges = xelpg_dss_steering_table;
+		} else if (xe->info.platform == XE_PVC) {
+			gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table;
+			gt->steering[DSS].ranges = xehpc_dss_steering_table;
+		} else if (xe->info.platform == XE_DG2) {
+			gt->steering[L3BANK].ranges = xehp_l3bank_steering_table;
+			gt->steering[MSLICE].ranges = xehp_mslice_steering_table;
+			gt->steering[LNCF].ranges = xehp_lncf_steering_table;
+			gt->steering[DSS].ranges = xehp_dss_steering_table;
+			gt->steering[IMPLICIT_STEERING].ranges = dg2_implicit_steering_table;
+		} else {
+			gt->steering[L3BANK].ranges = xelp_l3bank_steering_table;
+			gt->steering[DSS].ranges = xelp_dss_steering_table;
+		}
+	}
+
+	/* Select non-terminated steering target for each type */
+	for (int i = 0; i < NUM_STEERING_TYPES; i++)
+		if (gt->steering[i].ranges && xe_steering_types[i].init)
+			xe_steering_types[i].init(gt);
+}
+
+/**
+ * xe_gt_mcr_set_implicit_defaults - Initialize steer control registers
+ * @gt: GT structure
+ *
+ * Some register ranges don't need to have their steering control registers
+ * changed on each access - it's sufficient to set them once on initialization.
+ * This function sets those registers for each platform *
+ */
+void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (xe->info.platform == XE_DG2) {
+		u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) |
+			REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2);
+
+		xe_mmio_write32(gt, MCFG_MCR_SELECTOR, steer_val);
+		xe_mmio_write32(gt, SF_MCR_SELECTOR, steer_val);
+		/*
+		 * For GAM registers, all reads should be directed to instance 1
+		 * (unicast reads against other instances are not allowed),
+		 * and instance 1 is already the hardware's default steering
+		 * target, which we never change
+		 */
+	}
+}
+
+/*
+ * xe_gt_mcr_get_nonterminated_steering - find group/instance values that
+ *    will steer a register to a non-terminated instance
+ * @gt: GT structure
+ * @reg: register for which the steering is required
+ * @group: return variable for group steering
+ * @instance: return variable for instance steering
+ *
+ * This function returns a group/instance pair that is guaranteed to work for
+ * read steering of the given register. Note that a value will be returned even
+ * if the register is not replicated and therefore does not actually require
+ * steering.
+ *
+ * Returns true if the caller should steer to the @group/@instance values
+ * returned.  Returns false if the caller need not perform any steering
+ */
+static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
+						 struct xe_reg_mcr reg_mcr,
+						 u8 *group, u8 *instance)
+{
+	const struct xe_reg reg = to_xe_reg(reg_mcr);
+	const struct xe_mmio_range *implicit_ranges;
+
+	for (int type = 0; type < IMPLICIT_STEERING; type++) {
+		if (!gt->steering[type].ranges)
+			continue;
+
+		for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) {
+			if (xe_mmio_in_range(gt, &gt->steering[type].ranges[i], reg)) {
+				*group = gt->steering[type].group_target;
+				*instance = gt->steering[type].instance_target;
+				return true;
+			}
+		}
+	}
+
+	implicit_ranges = gt->steering[IMPLICIT_STEERING].ranges;
+	if (implicit_ranges)
+		for (int i = 0; implicit_ranges[i].end > 0; i++)
+			if (xe_mmio_in_range(gt, &implicit_ranges[i], reg))
+				return false;
+
+	/*
+	 * Not found in a steering table and not a register with implicit
+	 * steering. Just steer to 0/0 as a guess and raise a warning.
+	 */
+	drm_WARN(&gt_to_xe(gt)->drm, true,
+		 "Did not find MCR register %#x in any MCR steering table\n",
+		 reg.addr);
+	*group = 0;
+	*instance = 0;
+
+	return true;
+}
+
+/*
+ * Obtain exclusive access to MCR steering.  On MTL and beyond we also need
+ * to synchronize with external clients (e.g., firmware), so a semaphore
+ * register will also need to be taken.
+ */
+static void mcr_lock(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int ret = 0;
+
+	spin_lock(&gt->mcr_lock);
+
+	/*
+	 * Starting with MTL we also need to grab a semaphore register
+	 * to synchronize with external agents (e.g., firmware) that now
+	 * shares the same steering control register. The semaphore is obtained
+	 * when a read to the relevant register returns 1.
+	 */
+	if (GRAPHICS_VERx100(xe) >= 1270)
+		ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL,
+				     true);
+
+	drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
+}
+
+static void mcr_unlock(struct xe_gt *gt)
+{
+	/* Release hardware semaphore - this is done by writing 1 to the register */
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
+		xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1);
+
+	spin_unlock(&gt->mcr_lock);
+}
+
+/*
+ * Access a register with specific MCR steering
+ *
+ * Caller needs to make sure the relevant forcewake wells are up.
+ */
+static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
+				u8 rw_flag, int group, int instance, u32 value)
+{
+	const struct xe_reg reg = to_xe_reg(reg_mcr);
+	struct xe_reg steer_reg;
+	u32 steer_val, val = 0;
+
+	lockdep_assert_held(&gt->mcr_lock);
+
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+		steer_reg = MTL_MCR_SELECTOR;
+		steer_val = REG_FIELD_PREP(MTL_MCR_GROUPID, group) |
+			REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance);
+	} else {
+		steer_reg = MCR_SELECTOR;
+		steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, group) |
+			REG_FIELD_PREP(MCR_SUBSLICE_MASK, instance);
+	}
+
+	/*
+	 * Always leave the hardware in multicast mode when doing reads and only
+	 * change it to unicast mode when doing writes of a specific instance.
+	 *
+	 * The setting of the multicast/unicast bit usually wouldn't matter for
+	 * read operations (which always return the value from a single register
+	 * instance regardless of how that bit is set), but some platforms may
+	 * have workarounds requiring us to remain in multicast mode for reads,
+	 * e.g. Wa_22013088509 on PVC.  There's no real downside to this, so
+	 * we'll just go ahead and do so on all platforms; we'll only clear the
+	 * multicast bit from the mask when explicitly doing a write operation.
+	 *
+	 * No need to save old steering reg value.
+	 */
+	if (rw_flag == MCR_OP_READ)
+		steer_val |= MCR_MULTICAST;
+
+	xe_mmio_write32(gt, steer_reg, steer_val);
+
+	if (rw_flag == MCR_OP_READ)
+		val = xe_mmio_read32(gt, reg);
+	else
+		xe_mmio_write32(gt, reg, value);
+
+	/*
+	 * If we turned off the multicast bit (during a write) we're required
+	 * to turn it back on before finishing.  The group and instance values
+	 * don't matter since they'll be re-programmed on the next MCR
+	 * operation.
+	 */
+	if (rw_flag == MCR_OP_WRITE)
+		xe_mmio_write32(gt, steer_reg, MCR_MULTICAST);
+
+	return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_read_any - reads a non-terminated instance of an MCR register
+ * @gt: GT structure
+ * @reg_mcr: register to read
+ *
+ * Reads a GT MCR register.  The read will be steered to a non-terminated
+ * instance (i.e., one that isn't fused off or powered down by power gating).
+ * This function assumes the caller is already holding any necessary forcewake
+ * domains.
+ *
+ * Returns the value from a non-terminated instance of @reg.
+ */
+u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr)
+{
+	const struct xe_reg reg = to_xe_reg(reg_mcr);
+	u8 group, instance;
+	u32 val;
+	bool steer;
+
+	steer = xe_gt_mcr_get_nonterminated_steering(gt, reg_mcr,
+						     &group, &instance);
+
+	if (steer) {
+		mcr_lock(gt);
+		val = rw_with_mcr_steering(gt, reg_mcr, MCR_OP_READ,
+					   group, instance, 0);
+		mcr_unlock(gt);
+	} else {
+		val = xe_mmio_read32(gt, reg);
+	}
+
+	return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_read - read a specific instance of an MCR register
+ * @gt: GT structure
+ * @reg_mcr: the MCR register to read
+ * @group: the MCR group
+ * @instance: the MCR instance
+ *
+ * Returns the value read from an MCR register after steering toward a specific
+ * group/instance.
+ */
+u32 xe_gt_mcr_unicast_read(struct xe_gt *gt,
+			   struct xe_reg_mcr reg_mcr,
+			   int group, int instance)
+{
+	u32 val;
+
+	mcr_lock(gt);
+	val = rw_with_mcr_steering(gt, reg_mcr, MCR_OP_READ, group, instance, 0);
+	mcr_unlock(gt);
+
+	return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_write - write a specific instance of an MCR register
+ * @gt: GT structure
+ * @reg_mcr: the MCR register to write
+ * @value: value to write
+ * @group: the MCR group
+ * @instance: the MCR instance
+ *
+ * Write an MCR register in unicast mode after steering toward a specific
+ * group/instance.
+ */
+void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
+			     u32 value, int group, int instance)
+{
+	mcr_lock(gt);
+	rw_with_mcr_steering(gt, reg_mcr, MCR_OP_WRITE, group, instance, value);
+	mcr_unlock(gt);
+}
+
+/**
+ * xe_gt_mcr_multicast_write - write a value to all instances of an MCR register
+ * @gt: GT structure
+ * @reg_mcr: the MCR register to write
+ * @value: value to write
+ *
+ * Write an MCR register in multicast mode to update all instances.
+ */
+void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
+			       u32 value)
+{
+	struct xe_reg reg = to_xe_reg(reg_mcr);
+
+	/*
+	 * Synchronize with any unicast operations.  Once we have exclusive
+	 * access, the MULTICAST bit should already be set, so there's no need
+	 * to touch the steering register.
+	 */
+	mcr_lock(gt);
+	xe_mmio_write32(gt, reg, value);
+	mcr_unlock(gt);
+}
+
+void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	for (int i = 0; i < NUM_STEERING_TYPES; i++) {
+		if (gt->steering[i].ranges) {
+			drm_printf(p, "%s steering: group=%#x, instance=%#x\n",
+				   xe_steering_types[i].name,
+				   gt->steering[i].group_target,
+				   gt->steering[i].instance_target);
+			for (int j = 0; gt->steering[i].ranges[j].end; j++)
+				drm_printf(p, "\t0x%06x - 0x%06x\n",
+					   gt->steering[i].ranges[j].start,
+					   gt->steering[i].ranges[j].end);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
new file mode 100644
index 000000000000..27ca1bc880a0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_MCR_H_
+#define _XE_GT_MCR_H_
+
+#include "regs/xe_reg_defs.h"
+
+struct drm_printer;
+struct xe_gt;
+
+void xe_gt_mcr_init(struct xe_gt *gt);
+
+void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt);
+
+u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
+			   int group, int instance);
+u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr mcr_reg);
+
+void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
+			     u32 value, int group, int instance);
+void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
+			       u32 value);
+
+void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
+
+#endif /* _XE_GT_MCR_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
new file mode 100644
index 000000000000..59a70d2e0a7a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -0,0 +1,646 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt_pagefault.h"
+
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+
+#include "abi/guc_actions_abi.h"
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_migrate.h"
+#include "xe_pt.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+struct pagefault {
+	u64 page_addr;
+	u32 asid;
+	u16 pdata;
+	u8 vfid;
+	u8 access_type;
+	u8 fault_type;
+	u8 fault_level;
+	u8 engine_class;
+	u8 engine_instance;
+	u8 fault_unsuccessful;
+	bool trva_fault;
+};
+
+enum access_type {
+	ACCESS_TYPE_READ = 0,
+	ACCESS_TYPE_WRITE = 1,
+	ACCESS_TYPE_ATOMIC = 2,
+	ACCESS_TYPE_RESERVED = 3,
+};
+
+enum fault_type {
+	NOT_PRESENT = 0,
+	WRITE_ACCESS_VIOLATION = 1,
+	ATOMIC_ACCESS_VIOLATION = 2,
+};
+
+struct acc {
+	u64 va_range_base;
+	u32 asid;
+	u32 sub_granularity;
+	u8 granularity;
+	u8 vfid;
+	u8 access_type;
+	u8 engine_class;
+	u8 engine_instance;
+};
+
+static bool access_is_atomic(enum access_type access_type)
+{
+	return access_type == ACCESS_TYPE_ATOMIC;
+}
+
+static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma)
+{
+	return BIT(tile->id) & vma->tile_present &&
+		!(BIT(tile->id) & vma->usm.tile_invalidated);
+}
+
+static bool vma_matches(struct xe_vma *vma, u64 page_addr)
+{
+	if (page_addr > xe_vma_end(vma) - 1 ||
+	    page_addr + SZ_4K - 1 < xe_vma_start(vma))
+		return false;
+
+	return true;
+}
+
+static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
+{
+	struct xe_vma *vma = NULL;
+
+	if (vm->usm.last_fault_vma) {   /* Fast lookup */
+		if (vma_matches(vm->usm.last_fault_vma, page_addr))
+			vma = vm->usm.last_fault_vma;
+	}
+	if (!vma)
+		vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
+
+	return vma;
+}
+
+static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
+		       bool atomic, unsigned int id)
+{
+	struct xe_bo *bo = xe_vma_bo(vma);
+	struct xe_vm *vm = xe_vma_vm(vma);
+	unsigned int num_shared = 2; /* slots for bind + move */
+	int err;
+
+	err = xe_vm_prepare_vma(exec, vma, num_shared);
+	if (err)
+		return err;
+
+	if (atomic && IS_DGFX(vm->xe)) {
+		if (xe_vma_is_userptr(vma)) {
+			err = -EACCES;
+			return err;
+		}
+
+		/* Migrate to VRAM, move should invalidate the VMA first */
+		err = xe_bo_migrate(bo, XE_PL_VRAM0 + id);
+		if (err)
+			return err;
+	} else if (bo) {
+		/* Create backing store if needed */
+		err = xe_bo_validate(bo, vm, true);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct drm_exec exec;
+	struct xe_vm *vm;
+	struct xe_vma *vma = NULL;
+	struct dma_fence *fence;
+	bool write_locked;
+	int ret = 0;
+	bool atomic;
+
+	/* SW isn't expected to handle TRTT faults */
+	if (pf->trva_fault)
+		return -EFAULT;
+
+	/* ASID to VM */
+	mutex_lock(&xe->usm.lock);
+	vm = xa_load(&xe->usm.asid_to_vm, pf->asid);
+	if (vm)
+		xe_vm_get(vm);
+	mutex_unlock(&xe->usm.lock);
+	if (!vm || !xe_vm_in_fault_mode(vm))
+		return -EINVAL;
+
+retry_userptr:
+	/*
+	 * TODO: Avoid exclusive lock if VM doesn't have userptrs, or
+	 * start out read-locked?
+	 */
+	down_write(&vm->lock);
+	write_locked = true;
+	vma = lookup_vma(vm, pf->page_addr);
+	if (!vma) {
+		ret = -EINVAL;
+		goto unlock_vm;
+	}
+
+	if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) {
+		downgrade_write(&vm->lock);
+		write_locked = false;
+	}
+
+	trace_xe_vma_pagefault(vma);
+
+	atomic = access_is_atomic(pf->access_type);
+
+	/* Check if VMA is valid */
+	if (vma_is_valid(tile, vma) && !atomic)
+		goto unlock_vm;
+
+	/* TODO: Validate fault */
+
+	if (xe_vma_is_userptr(vma) && write_locked) {
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_del_init(&vma->userptr.invalidate_link);
+		spin_unlock(&vm->userptr.invalidated_lock);
+
+		ret = xe_vma_userptr_pin_pages(vma);
+		if (ret)
+			goto unlock_vm;
+
+		downgrade_write(&vm->lock);
+		write_locked = false;
+	}
+
+	/* Lock VM and BOs dma-resv */
+	drm_exec_init(&exec, 0, 0);
+	drm_exec_until_all_locked(&exec) {
+		ret = xe_pf_begin(&exec, vma, atomic, tile->id);
+		drm_exec_retry_on_contention(&exec);
+		if (ret)
+			goto unlock_dma_resv;
+	}
+
+	/* Bind VMA only to the GT that has faulted */
+	trace_xe_vma_pf_bind(vma);
+	fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0,
+				 vma->tile_present & BIT(tile->id));
+	if (IS_ERR(fence)) {
+		ret = PTR_ERR(fence);
+		goto unlock_dma_resv;
+	}
+
+	/*
+	 * XXX: Should we drop the lock before waiting? This only helps if doing
+	 * GPU binds which is currently only done if we have to wait for more
+	 * than 10ms on a move.
+	 */
+	dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+
+	if (xe_vma_is_userptr(vma))
+		ret = xe_vma_userptr_check_repin(vma);
+	vma->usm.tile_invalidated &= ~BIT(tile->id);
+
+unlock_dma_resv:
+	drm_exec_fini(&exec);
+unlock_vm:
+	if (!ret)
+		vm->usm.last_fault_vma = vma;
+	if (write_locked)
+		up_write(&vm->lock);
+	else
+		up_read(&vm->lock);
+	if (ret == -EAGAIN)
+		goto retry_userptr;
+
+	if (!ret) {
+		ret = xe_gt_tlb_invalidation_vma(gt, NULL, vma);
+		if (ret >= 0)
+			ret = 0;
+	}
+	xe_vm_put(vm);
+
+	return ret;
+}
+
+static int send_pagefault_reply(struct xe_guc *guc,
+				struct xe_guc_pagefault_reply *reply)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_PAGE_FAULT_RES_DESC,
+		reply->dw0,
+		reply->dw1,
+	};
+
+	return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+static void print_pagefault(struct xe_device *xe, struct pagefault *pf)
+{
+	drm_dbg(&xe->drm, "\n\tASID: %d\n"
+		 "\tVFID: %d\n"
+		 "\tPDATA: 0x%04x\n"
+		 "\tFaulted Address: 0x%08x%08x\n"
+		 "\tFaultType: %d\n"
+		 "\tAccessType: %d\n"
+		 "\tFaultLevel: %d\n"
+		 "\tEngineClass: %d\n"
+		 "\tEngineInstance: %d\n",
+		 pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr),
+		 lower_32_bits(pf->page_addr),
+		 pf->fault_type, pf->access_type, pf->fault_level,
+		 pf->engine_class, pf->engine_instance);
+}
+
+#define PF_MSG_LEN_DW	4
+
+static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
+{
+	const struct xe_guc_pagefault_desc *desc;
+	bool ret = false;
+
+	spin_lock_irq(&pf_queue->lock);
+	if (pf_queue->head != pf_queue->tail) {
+		desc = (const struct xe_guc_pagefault_desc *)
+			(pf_queue->data + pf_queue->head);
+
+		pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0);
+		pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0);
+		pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0);
+		pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0);
+		pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) <<
+			PFD_PDATA_HI_SHIFT;
+		pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0);
+		pf->asid = FIELD_GET(PFD_ASID, desc->dw1);
+		pf->vfid = FIELD_GET(PFD_VFID, desc->dw2);
+		pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2);
+		pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2);
+		pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) <<
+			PFD_VIRTUAL_ADDR_HI_SHIFT;
+		pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) <<
+			PFD_VIRTUAL_ADDR_LO_SHIFT;
+
+		pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
+			PF_QUEUE_NUM_DW;
+		ret = true;
+	}
+	spin_unlock_irq(&pf_queue->lock);
+
+	return ret;
+}
+
+static bool pf_queue_full(struct pf_queue *pf_queue)
+{
+	lockdep_assert_held(&pf_queue->lock);
+
+	return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <=
+		PF_MSG_LEN_DW;
+}
+
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct pf_queue *pf_queue;
+	unsigned long flags;
+	u32 asid;
+	bool full;
+
+	if (unlikely(len != PF_MSG_LEN_DW))
+		return -EPROTO;
+
+	asid = FIELD_GET(PFD_ASID, msg[1]);
+	pf_queue = &gt->usm.pf_queue[asid % NUM_PF_QUEUE];
+
+	spin_lock_irqsave(&pf_queue->lock, flags);
+	full = pf_queue_full(pf_queue);
+	if (!full) {
+		memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32));
+		pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW;
+		queue_work(gt->usm.pf_wq, &pf_queue->worker);
+	} else {
+		drm_warn(&xe->drm, "PF Queue full, shouldn't be possible");
+	}
+	spin_unlock_irqrestore(&pf_queue->lock, flags);
+
+	return full ? -ENOSPC : 0;
+}
+
+#define USM_QUEUE_MAX_RUNTIME_MS	20
+
+static void pf_queue_work_func(struct work_struct *w)
+{
+	struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker);
+	struct xe_gt *gt = pf_queue->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_guc_pagefault_reply reply = {};
+	struct pagefault pf = {};
+	unsigned long threshold;
+	int ret;
+
+	threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
+
+	while (get_pagefault(pf_queue, &pf)) {
+		ret = handle_pagefault(gt, &pf);
+		if (unlikely(ret)) {
+			print_pagefault(xe, &pf);
+			pf.fault_unsuccessful = 1;
+			drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret);
+		}
+
+		reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
+			FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
+			FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
+			FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
+			FIELD_PREP(PFR_ASID, pf.asid);
+
+		reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
+			FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
+			FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
+			FIELD_PREP(PFR_PDATA, pf.pdata);
+
+		send_pagefault_reply(&gt->uc.guc, &reply);
+
+		if (time_after(jiffies, threshold) &&
+		    pf_queue->head != pf_queue->tail) {
+			queue_work(gt->usm.pf_wq, w);
+			break;
+		}
+	}
+}
+
+static void acc_queue_work_func(struct work_struct *w);
+
+int xe_gt_pagefault_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i;
+
+	if (!xe->info.has_usm)
+		return 0;
+
+	for (i = 0; i < NUM_PF_QUEUE; ++i) {
+		gt->usm.pf_queue[i].gt = gt;
+		spin_lock_init(&gt->usm.pf_queue[i].lock);
+		INIT_WORK(&gt->usm.pf_queue[i].worker, pf_queue_work_func);
+	}
+	for (i = 0; i < NUM_ACC_QUEUE; ++i) {
+		gt->usm.acc_queue[i].gt = gt;
+		spin_lock_init(&gt->usm.acc_queue[i].lock);
+		INIT_WORK(&gt->usm.acc_queue[i].worker, acc_queue_work_func);
+	}
+
+	gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue",
+					WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE);
+	if (!gt->usm.pf_wq)
+		return -ENOMEM;
+
+	gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue",
+					 WQ_UNBOUND | WQ_HIGHPRI,
+					 NUM_ACC_QUEUE);
+	if (!gt->usm.acc_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void xe_gt_pagefault_reset(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i;
+
+	if (!xe->info.has_usm)
+		return;
+
+	for (i = 0; i < NUM_PF_QUEUE; ++i) {
+		spin_lock_irq(&gt->usm.pf_queue[i].lock);
+		gt->usm.pf_queue[i].head = 0;
+		gt->usm.pf_queue[i].tail = 0;
+		spin_unlock_irq(&gt->usm.pf_queue[i].lock);
+	}
+
+	for (i = 0; i < NUM_ACC_QUEUE; ++i) {
+		spin_lock(&gt->usm.acc_queue[i].lock);
+		gt->usm.acc_queue[i].head = 0;
+		gt->usm.acc_queue[i].tail = 0;
+		spin_unlock(&gt->usm.acc_queue[i].lock);
+	}
+}
+
+static int granularity_in_byte(int val)
+{
+	switch (val) {
+	case 0:
+		return SZ_128K;
+	case 1:
+		return SZ_2M;
+	case 2:
+		return SZ_16M;
+	case 3:
+		return SZ_64M;
+	default:
+		return 0;
+	}
+}
+
+static int sub_granularity_in_byte(int val)
+{
+	return (granularity_in_byte(val) / 32);
+}
+
+static void print_acc(struct xe_device *xe, struct acc *acc)
+{
+	drm_warn(&xe->drm, "Access counter request:\n"
+		 "\tType: %s\n"
+		 "\tASID: %d\n"
+		 "\tVFID: %d\n"
+		 "\tEngine: %d:%d\n"
+		 "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n"
+		 "\tSub_Granularity Vector: 0x%08x\n"
+		 "\tVA Range base: 0x%016llx\n",
+		 acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL",
+		 acc->asid, acc->vfid, acc->engine_class, acc->engine_instance,
+		 granularity_in_byte(acc->granularity) / SZ_1K,
+		 sub_granularity_in_byte(acc->granularity) / SZ_1K,
+		 acc->sub_granularity, acc->va_range_base);
+}
+
+static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc)
+{
+	u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) *
+		sub_granularity_in_byte(acc->granularity);
+
+	return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K);
+}
+
+static int handle_acc(struct xe_gt *gt, struct acc *acc)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct drm_exec exec;
+	struct xe_vm *vm;
+	struct xe_vma *vma;
+	int ret = 0;
+
+	/* We only support ACC_TRIGGER at the moment */
+	if (acc->access_type != ACC_TRIGGER)
+		return -EINVAL;
+
+	/* ASID to VM */
+	mutex_lock(&xe->usm.lock);
+	vm = xa_load(&xe->usm.asid_to_vm, acc->asid);
+	if (vm)
+		xe_vm_get(vm);
+	mutex_unlock(&xe->usm.lock);
+	if (!vm || !xe_vm_in_fault_mode(vm))
+		return -EINVAL;
+
+	down_read(&vm->lock);
+
+	/* Lookup VMA */
+	vma = get_acc_vma(vm, acc);
+	if (!vma) {
+		ret = -EINVAL;
+		goto unlock_vm;
+	}
+
+	trace_xe_vma_acc(vma);
+
+	/* Userptr or null can't be migrated, nothing to do */
+	if (xe_vma_has_no_bo(vma))
+		goto unlock_vm;
+
+	/* Lock VM and BOs dma-resv */
+	drm_exec_init(&exec, 0, 0);
+	drm_exec_until_all_locked(&exec) {
+		ret = xe_pf_begin(&exec, vma, true, tile->id);
+		drm_exec_retry_on_contention(&exec);
+		if (ret)
+			break;
+	}
+
+	drm_exec_fini(&exec);
+unlock_vm:
+	up_read(&vm->lock);
+	xe_vm_put(vm);
+
+	return ret;
+}
+
+#define make_u64(hi__, low__)  ((u64)(hi__) << 32 | (u64)(low__))
+
+#define ACC_MSG_LEN_DW        4
+
+static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
+{
+	const struct xe_guc_acc_desc *desc;
+	bool ret = false;
+
+	spin_lock(&acc_queue->lock);
+	if (acc_queue->head != acc_queue->tail) {
+		desc = (const struct xe_guc_acc_desc *)
+			(acc_queue->data + acc_queue->head);
+
+		acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2);
+		acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 |
+			FIELD_GET(ACC_SUBG_LO, desc->dw0);
+		acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1);
+		acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1);
+		acc->asid =  FIELD_GET(ACC_ASID, desc->dw1);
+		acc->vfid =  FIELD_GET(ACC_VFID, desc->dw2);
+		acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0);
+		acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI,
+					      desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO);
+
+		acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) %
+				  ACC_QUEUE_NUM_DW;
+		ret = true;
+	}
+	spin_unlock(&acc_queue->lock);
+
+	return ret;
+}
+
+static void acc_queue_work_func(struct work_struct *w)
+{
+	struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker);
+	struct xe_gt *gt = acc_queue->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct acc acc = {};
+	unsigned long threshold;
+	int ret;
+
+	threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
+
+	while (get_acc(acc_queue, &acc)) {
+		ret = handle_acc(gt, &acc);
+		if (unlikely(ret)) {
+			print_acc(xe, &acc);
+			drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
+		}
+
+		if (time_after(jiffies, threshold) &&
+		    acc_queue->head != acc_queue->tail) {
+			queue_work(gt->usm.acc_wq, w);
+			break;
+		}
+	}
+}
+
+static bool acc_queue_full(struct acc_queue *acc_queue)
+{
+	lockdep_assert_held(&acc_queue->lock);
+
+	return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <=
+		ACC_MSG_LEN_DW;
+}
+
+int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct acc_queue *acc_queue;
+	u32 asid;
+	bool full;
+
+	if (unlikely(len != ACC_MSG_LEN_DW))
+		return -EPROTO;
+
+	asid = FIELD_GET(ACC_ASID, msg[1]);
+	acc_queue = &gt->usm.acc_queue[asid % NUM_ACC_QUEUE];
+
+	spin_lock(&acc_queue->lock);
+	full = acc_queue_full(acc_queue);
+	if (!full) {
+		memcpy(acc_queue->data + acc_queue->tail, msg,
+		       len * sizeof(u32));
+		acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW;
+		queue_work(gt->usm.acc_wq, &acc_queue->worker);
+	} else {
+		drm_warn(&gt_to_xe(gt)->drm, "ACC Queue full, dropping ACC");
+	}
+	spin_unlock(&acc_queue->lock);
+
+	return full ? -ENOSPC : 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h
new file mode 100644
index 000000000000..839c065a5e4c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_PAGEFAULT_H_
+#define _XE_GT_PAGEFAULT_H_
+
+#include <linux/types.h>
+
+struct xe_gt;
+struct xe_guc;
+
+int xe_gt_pagefault_init(struct xe_gt *gt);
+void xe_gt_pagefault_reset(struct xe_gt *gt);
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif	/* _XE_GT_PAGEFAULT_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
new file mode 100644
index 000000000000..5991bcadd47e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_PRINTK_H_
+#define _XE_GT_PRINTK_H_
+
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+
+#define xe_gt_printk(_gt, _level, _fmt, ...) \
+	drm_##_level(&gt_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+
+#define xe_gt_err(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_warn(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), warn, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_notice(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), notice, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_info(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), info, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_dbg(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), dbg, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_err_ratelimited(_gt, _fmt, ...) \
+	xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_WARN(_gt, _condition, _fmt, ...) \
+	drm_WARN(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+
+#define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \
+	drm_WARN_ONCE(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+
+#define xe_gt_WARN_ON(_gt, _condition) \
+	xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition))
+
+#define xe_gt_WARN_ON_ONCE(_gt, _condition) \
+	xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition))
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
new file mode 100644
index 000000000000..c69d2e8a0fe1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt_sysfs.h"
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_gt.h"
+
+static void xe_gt_sysfs_kobj_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static const struct kobj_type xe_gt_sysfs_kobj_type = {
+	.release = xe_gt_sysfs_kobj_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void gt_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+
+	kobject_put(gt->sysfs);
+}
+
+void xe_gt_sysfs_init(struct xe_gt *gt)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct kobj_gt *kg;
+	int err;
+
+	kg = kzalloc(sizeof(*kg), GFP_KERNEL);
+	if (!kg)
+		return;
+
+	kobject_init(&kg->base, &xe_gt_sysfs_kobj_type);
+	kg->gt = gt;
+
+	err = kobject_add(&kg->base, tile->sysfs, "gt%d", gt->info.id);
+	if (err) {
+		drm_warn(&xe->drm, "failed to add GT sysfs directory, err: %d\n", err);
+		kobject_put(&kg->base);
+		return;
+	}
+
+	gt->sysfs = &kg->base;
+
+	err = drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt);
+	if (err) {
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+		return;
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h
new file mode 100644
index 000000000000..e3ec278ca0be
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_SYSFS_H_
+#define _XE_GT_SYSFS_H_
+
+#include "xe_gt_sysfs_types.h"
+
+void xe_gt_sysfs_init(struct xe_gt *gt);
+
+static inline struct xe_gt *
+kobj_to_gt(struct kobject *kobj)
+{
+	return container_of(kobj, struct kobj_gt, base)->gt;
+}
+
+#endif /* _XE_GT_SYSFS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs_types.h b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h
new file mode 100644
index 000000000000..d3bc6b83360f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_SYSFS_TYPES_H_
+#define _XE_GT_SYSFS_TYPES_H_
+
+#include <linux/kobject.h>
+
+struct xe_gt;
+
+/**
+ * struct kobj_gt - A GT's kobject struct that connects the kobject and the GT
+ *
+ * When dealing with multiple GTs, this struct helps to understand which GT
+ * needs to be addressed on a given sysfs call.
+ */
+struct kobj_gt {
+	/** @base: The actual kobject */
+	struct kobject base;
+	/** @gt: A pointer to the GT itself */
+	struct xe_gt *gt;
+};
+
+#endif	/* _XE_GT_SYSFS_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
new file mode 100644
index 000000000000..63d640591a52
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include <regs/xe_gt_regs.h>
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_sysfs.h"
+#include "xe_gt_throttle_sysfs.h"
+#include "xe_mmio.h"
+
+/**
+ * DOC: Xe GT Throttle
+ *
+ * Provides sysfs entries for frequency throttle reasons in GT
+ *
+ * device/gt#/freq0/throttle/status - Overall status
+ * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to PL1
+ * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to PL2
+ * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4, Iccmax etc.
+ * device/gt#/freq0/throttle/reason_thermal - Frequency throttle due to thermal
+ * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot
+ * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL
+ * device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT
+ * device/gt#/freq0/throttle/reason_vr_tdc -  Frequency throttle due to VR TDC
+ */
+
+static struct xe_gt *
+dev_to_gt(struct device *dev)
+{
+	return kobj_to_gt(dev->kobj.parent);
+}
+
+static u32 read_perf_limit_reasons(struct xe_gt *gt)
+{
+	u32 reg;
+
+	if (xe_gt_is_media_type(gt))
+		reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS);
+	else
+		reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS);
+
+	return reg;
+}
+
+static u32 read_status(struct xe_gt *gt)
+{
+	u32 status = read_perf_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK;
+
+	return status;
+}
+
+static u32 read_reason_pl1(struct xe_gt *gt)
+{
+	u32 pl1 = read_perf_limit_reasons(gt) & POWER_LIMIT_1_MASK;
+
+	return pl1;
+}
+
+static u32 read_reason_pl2(struct xe_gt *gt)
+{
+	u32 pl2 = read_perf_limit_reasons(gt) & POWER_LIMIT_2_MASK;
+
+	return pl2;
+}
+
+static u32 read_reason_pl4(struct xe_gt *gt)
+{
+	u32 pl4 = read_perf_limit_reasons(gt) & POWER_LIMIT_4_MASK;
+
+	return pl4;
+}
+
+static u32 read_reason_thermal(struct xe_gt *gt)
+{
+	u32 thermal = read_perf_limit_reasons(gt) & THERMAL_LIMIT_MASK;
+
+	return thermal;
+}
+
+static u32 read_reason_prochot(struct xe_gt *gt)
+{
+	u32 prochot = read_perf_limit_reasons(gt) & PROCHOT_MASK;
+
+	return prochot;
+}
+
+static u32 read_reason_ratl(struct xe_gt *gt)
+{
+	u32 ratl = read_perf_limit_reasons(gt) & RATL_MASK;
+
+	return ratl;
+}
+
+static u32 read_reason_vr_thermalert(struct xe_gt *gt)
+{
+	u32 thermalert = read_perf_limit_reasons(gt) & VR_THERMALERT_MASK;
+
+	return thermalert;
+}
+
+static u32 read_reason_vr_tdc(struct xe_gt *gt)
+{
+	u32 tdc = read_perf_limit_reasons(gt) & VR_TDC_MASK;
+
+	return tdc;
+}
+
+static ssize_t status_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool status = !!read_status(gt);
+
+	return sysfs_emit(buff, "%u\n", status);
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t reason_pl1_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool pl1 = !!read_reason_pl1(gt);
+
+	return sysfs_emit(buff, "%u\n", pl1);
+}
+static DEVICE_ATTR_RO(reason_pl1);
+
+static ssize_t reason_pl2_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool pl2 = !!read_reason_pl2(gt);
+
+	return sysfs_emit(buff, "%u\n", pl2);
+}
+static DEVICE_ATTR_RO(reason_pl2);
+
+static ssize_t reason_pl4_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool pl4 = !!read_reason_pl4(gt);
+
+	return sysfs_emit(buff, "%u\n", pl4);
+}
+static DEVICE_ATTR_RO(reason_pl4);
+
+static ssize_t reason_thermal_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool thermal = !!read_reason_thermal(gt);
+
+	return sysfs_emit(buff, "%u\n", thermal);
+}
+static DEVICE_ATTR_RO(reason_thermal);
+
+static ssize_t reason_prochot_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool prochot = !!read_reason_prochot(gt);
+
+	return sysfs_emit(buff, "%u\n", prochot);
+}
+static DEVICE_ATTR_RO(reason_prochot);
+
+static ssize_t reason_ratl_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool ratl = !!read_reason_ratl(gt);
+
+	return sysfs_emit(buff, "%u\n", ratl);
+}
+static DEVICE_ATTR_RO(reason_ratl);
+
+static ssize_t reason_vr_thermalert_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool thermalert = !!read_reason_vr_thermalert(gt);
+
+	return sysfs_emit(buff, "%u\n", thermalert);
+}
+static DEVICE_ATTR_RO(reason_vr_thermalert);
+
+static ssize_t reason_vr_tdc_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool tdc = !!read_reason_vr_tdc(gt);
+
+	return sysfs_emit(buff, "%u\n", tdc);
+}
+static DEVICE_ATTR_RO(reason_vr_tdc);
+
+static struct attribute *throttle_attrs[] = {
+	&dev_attr_status.attr,
+	&dev_attr_reason_pl1.attr,
+	&dev_attr_reason_pl2.attr,
+	&dev_attr_reason_pl4.attr,
+	&dev_attr_reason_thermal.attr,
+	&dev_attr_reason_prochot.attr,
+	&dev_attr_reason_ratl.attr,
+	&dev_attr_reason_vr_thermalert.attr,
+	&dev_attr_reason_vr_tdc.attr,
+	NULL
+};
+
+static const struct attribute_group throttle_group_attrs = {
+	.name = "throttle",
+	.attrs = throttle_attrs,
+};
+
+static void gt_throttle_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+
+	sysfs_remove_group(gt->freq, &throttle_group_attrs);
+}
+
+void xe_gt_throttle_sysfs_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	err = sysfs_create_group(gt->freq, &throttle_group_attrs);
+	if (err) {
+		drm_warn(&xe->drm, "failed to register throttle sysfs, err: %d\n", err);
+		return;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt);
+	if (err)
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
new file mode 100644
index 000000000000..3ecfd4beffe1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_THROTTLE_SYSFS_H_
+#define _XE_GT_THROTTLE_SYSFS_H_
+
+#include <drm/drm_managed.h>
+
+struct xe_gt;
+
+void xe_gt_throttle_sysfs_init(struct xe_gt *gt);
+
+#endif /* _XE_GT_THROTTLE_SYSFS_H_ */
+
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
new file mode 100644
index 000000000000..7eef23a00d77
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gt_tlb_invalidation.h"
+
+#include "abi/guc_actions_abi.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_trace.h"
+
+#define TLB_TIMEOUT	(HZ / 4)
+
+static void xe_gt_tlb_fence_timeout(struct work_struct *work)
+{
+	struct xe_gt *gt = container_of(work, struct xe_gt,
+					tlb_invalidation.fence_tdr.work);
+	struct xe_gt_tlb_invalidation_fence *fence, *next;
+
+	spin_lock_irq(&gt->tlb_invalidation.pending_lock);
+	list_for_each_entry_safe(fence, next,
+				 &gt->tlb_invalidation.pending_fences, link) {
+		s64 since_inval_ms = ktime_ms_delta(ktime_get(),
+						    fence->invalidation_time);
+
+		if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT)
+			break;
+
+		trace_xe_gt_tlb_invalidation_fence_timeout(fence);
+		drm_err(&gt_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d recv=%d",
+			gt->info.id, fence->seqno, gt->tlb_invalidation.seqno_recv);
+
+		list_del(&fence->link);
+		fence->base.error = -ETIME;
+		dma_fence_signal(&fence->base);
+		dma_fence_put(&fence->base);
+	}
+	if (!list_empty(&gt->tlb_invalidation.pending_fences))
+		queue_delayed_work(system_wq,
+				   &gt->tlb_invalidation.fence_tdr,
+				   TLB_TIMEOUT);
+	spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
+}
+
+/**
+ * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state
+ * @gt: graphics tile
+ *
+ * Initialize GT TLB invalidation state, purely software initialization, should
+ * be called once during driver load.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
+{
+	gt->tlb_invalidation.seqno = 1;
+	INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
+	spin_lock_init(&gt->tlb_invalidation.pending_lock);
+	spin_lock_init(&gt->tlb_invalidation.lock);
+	gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
+	INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
+			  xe_gt_tlb_fence_timeout);
+
+	return 0;
+}
+
+static void
+__invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
+{
+	trace_xe_gt_tlb_invalidation_fence_signal(fence);
+	dma_fence_signal(&fence->base);
+	dma_fence_put(&fence->base);
+}
+
+static void
+invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
+{
+	list_del(&fence->link);
+	__invalidation_fence_signal(fence);
+}
+
+/**
+ * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
+ * @gt: graphics tile
+ *
+ * Signal any pending invalidation fences, should be called during a GT reset
+ */
+void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
+{
+	struct xe_gt_tlb_invalidation_fence *fence, *next;
+	struct xe_guc *guc = &gt->uc.guc;
+	int pending_seqno;
+
+	/*
+	 * CT channel is already disabled at this point. No new TLB requests can
+	 * appear.
+	 */
+
+	mutex_lock(&gt->uc.guc.ct.lock);
+	spin_lock_irq(&gt->tlb_invalidation.pending_lock);
+	cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
+	/*
+	 * We might have various kworkers waiting for TLB flushes to complete
+	 * which are not tracked with an explicit TLB fence, however at this
+	 * stage that will never happen since the CT is already disabled, so
+	 * make sure we signal them here under the assumption that we have
+	 * completed a full GT reset.
+	 */
+	if (gt->tlb_invalidation.seqno == 1)
+		pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1;
+	else
+		pending_seqno = gt->tlb_invalidation.seqno - 1;
+	WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
+	wake_up_all(&guc->ct.wq);
+
+	list_for_each_entry_safe(fence, next,
+				 &gt->tlb_invalidation.pending_fences, link)
+		invalidation_fence_signal(fence);
+	spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
+	mutex_unlock(&gt->uc.guc.ct.lock);
+}
+
+static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
+{
+	int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv);
+
+	if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
+		return false;
+
+	if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
+		return true;
+
+	return seqno_recv >= seqno;
+}
+
+static int send_tlb_invalidation(struct xe_guc *guc,
+				 struct xe_gt_tlb_invalidation_fence *fence,
+				 u32 *action, int len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	int seqno;
+	int ret;
+
+	/*
+	 * XXX: The seqno algorithm relies on TLB invalidation being processed
+	 * in order which they currently are, if that changes the algorithm will
+	 * need to be updated.
+	 */
+
+	mutex_lock(&guc->ct.lock);
+	seqno = gt->tlb_invalidation.seqno;
+	if (fence) {
+		fence->seqno = seqno;
+		trace_xe_gt_tlb_invalidation_fence_send(fence);
+	}
+	action[1] = seqno;
+	ret = xe_guc_ct_send_locked(&guc->ct, action, len,
+				    G2H_LEN_DW_TLB_INVALIDATE, 1);
+	if (!ret && fence) {
+		spin_lock_irq(&gt->tlb_invalidation.pending_lock);
+		/*
+		 * We haven't actually published the TLB fence as per
+		 * pending_fences, but in theory our seqno could have already
+		 * been written as we acquired the pending_lock. In such a case
+		 * we can just go ahead and signal the fence here.
+		 */
+		if (tlb_invalidation_seqno_past(gt, seqno)) {
+			__invalidation_fence_signal(fence);
+		} else {
+			fence->invalidation_time = ktime_get();
+			list_add_tail(&fence->link,
+				      &gt->tlb_invalidation.pending_fences);
+
+			if (list_is_singular(&gt->tlb_invalidation.pending_fences))
+				queue_delayed_work(system_wq,
+						   &gt->tlb_invalidation.fence_tdr,
+						   TLB_TIMEOUT);
+		}
+		spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
+	} else if (ret < 0 && fence) {
+		__invalidation_fence_signal(fence);
+	}
+	if (!ret) {
+		gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
+			TLB_INVALIDATION_SEQNO_MAX;
+		if (!gt->tlb_invalidation.seqno)
+			gt->tlb_invalidation.seqno = 1;
+		ret = seqno;
+	}
+	mutex_unlock(&guc->ct.lock);
+
+	return ret;
+}
+
+#define MAKE_INVAL_OP(type)	((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
+		XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
+		XE_GUC_TLB_INVAL_FLUSH_CACHE)
+
+/**
+ * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
+ * @gt: graphics tile
+ *
+ * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
+ * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion.
+ *
+ * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
+ * negative error code on error.
+ */
+int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_TLB_INVALIDATION,
+		0,  /* seqno, replaced in send_tlb_invalidation */
+		MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
+	};
+
+	return send_tlb_invalidation(&gt->uc.guc, NULL, action,
+				     ARRAY_SIZE(action));
+}
+
+/**
+ * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
+ * @gt: graphics tile
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion, can be NULL
+ * @vma: VMA to invalidate
+ *
+ * Issue a range based TLB invalidation if supported, if not fallback to a full
+ * TLB invalidation. Completion of TLB is asynchronous and caller can either use
+ * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
+ * completion.
+ *
+ * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
+ * negative error code on error.
+ */
+int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
+			       struct xe_gt_tlb_invalidation_fence *fence,
+			       struct xe_vma *vma)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+#define MAX_TLB_INVALIDATION_LEN	7
+	u32 action[MAX_TLB_INVALIDATION_LEN];
+	int len = 0;
+
+	xe_gt_assert(gt, vma);
+
+	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
+	action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
+	if (!xe->info.has_range_tlb_invalidation) {
+		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
+	} else {
+		u64 start = xe_vma_start(vma);
+		u64 length = xe_vma_size(vma);
+		u64 align, end;
+
+		if (length < SZ_4K)
+			length = SZ_4K;
+
+		/*
+		 * We need to invalidate a higher granularity if start address
+		 * is not aligned to length. When start is not aligned with
+		 * length we need to find the length large enough to create an
+		 * address mask covering the required range.
+		 */
+		align = roundup_pow_of_two(length);
+		start = ALIGN_DOWN(xe_vma_start(vma), align);
+		end = ALIGN(xe_vma_end(vma), align);
+		length = align;
+		while (start + length < end) {
+			length <<= 1;
+			start = ALIGN_DOWN(xe_vma_start(vma), length);
+		}
+
+		/*
+		 * Minimum invalidation size for a 2MB page that the hardware
+		 * expects is 16MB
+		 */
+		if (length >= SZ_2M) {
+			length = max_t(u64, SZ_16M, length);
+			start = ALIGN_DOWN(xe_vma_start(vma), length);
+		}
+
+		xe_gt_assert(gt, length >= SZ_4K);
+		xe_gt_assert(gt, is_power_of_2(length));
+		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
+		xe_gt_assert(gt, IS_ALIGNED(start, length));
+
+		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
+		action[len++] = xe_vma_vm(vma)->usm.asid;
+		action[len++] = lower_32_bits(start);
+		action[len++] = upper_32_bits(start);
+		action[len++] = ilog2(length) - ilog2(SZ_4K);
+	}
+
+	xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
+
+	return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
+}
+
+/**
+ * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
+ * @gt: graphics tile
+ * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
+ *
+ * Wait for 200ms for a TLB invalidation to complete, in practice we always
+ * should receive the TLB invalidation within 200ms.
+ *
+ * Return: 0 on success, -ETIME on TLB invalidation timeout
+ */
+int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_guc *guc = &gt->uc.guc;
+	struct drm_printer p = drm_err_printer(__func__);
+	int ret;
+
+	/*
+	 * XXX: See above, this algorithm only works if seqno are always in
+	 * order
+	 */
+	ret = wait_event_timeout(guc->ct.wq,
+				 tlb_invalidation_seqno_past(gt, seqno),
+				 TLB_TIMEOUT);
+	if (!ret) {
+		drm_err(&xe->drm, "gt%d: TLB invalidation time'd out, seqno=%d, recv=%d\n",
+			gt->info.id, seqno, gt->tlb_invalidation.seqno_recv);
+		xe_guc_ct_print(&guc->ct, &p, true);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
+/**
+ * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
+ * @guc: guc
+ * @msg: message indicating TLB invalidation done
+ * @len: length of message
+ *
+ * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
+ * invalidation fences for seqno. Algorithm for this depends on seqno being
+ * received in-order and asserts this assumption.
+ *
+ * Return: 0 on success, -EPROTO for malformed messages.
+ */
+int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_gt_tlb_invalidation_fence *fence, *next;
+	unsigned long flags;
+
+	if (unlikely(len != 1))
+		return -EPROTO;
+
+	/*
+	 * This can also be run both directly from the IRQ handler and also in
+	 * process_g2h_msg(). Only one may process any individual CT message,
+	 * however the order they are processed here could result in skipping a
+	 * seqno. To handle that we just process all the seqnos from the last
+	 * seqno_recv up to and including the one in msg[0]. The delta should be
+	 * very small so there shouldn't be much of pending_fences we actually
+	 * need to iterate over here.
+	 *
+	 * From GuC POV we expect the seqnos to always appear in-order, so if we
+	 * see something later in the timeline we can be sure that anything
+	 * appearing earlier has already signalled, just that we have yet to
+	 * officially process the CT message like if racing against
+	 * process_g2h_msg().
+	 */
+	spin_lock_irqsave(&gt->tlb_invalidation.pending_lock, flags);
+	if (tlb_invalidation_seqno_past(gt, msg[0])) {
+		spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
+		return 0;
+	}
+
+	/*
+	 * wake_up_all() and wait_event_timeout() already have the correct
+	 * barriers.
+	 */
+	WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
+	wake_up_all(&guc->ct.wq);
+
+	list_for_each_entry_safe(fence, next,
+				 &gt->tlb_invalidation.pending_fences, link) {
+		trace_xe_gt_tlb_invalidation_fence_recv(fence);
+
+		if (!tlb_invalidation_seqno_past(gt, fence->seqno))
+			break;
+
+		invalidation_fence_signal(fence);
+	}
+
+	if (!list_empty(&gt->tlb_invalidation.pending_fences))
+		mod_delayed_work(system_wq,
+				 &gt->tlb_invalidation.fence_tdr,
+				 TLB_TIMEOUT);
+	else
+		cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
+
+	spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
new file mode 100644
index 000000000000..b333c1709397
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_TLB_INVALIDATION_H_
+#define _XE_GT_TLB_INVALIDATION_H_
+
+#include <linux/types.h>
+
+#include "xe_gt_tlb_invalidation_types.h"
+
+struct xe_gt;
+struct xe_guc;
+struct xe_vma;
+
+int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
+void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
+int xe_gt_tlb_invalidation_guc(struct xe_gt *gt);
+int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
+			       struct xe_gt_tlb_invalidation_fence *fence,
+			       struct xe_vma *vma);
+int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
+int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif	/* _XE_GT_TLB_INVALIDATION_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
new file mode 100644
index 000000000000..934c828efe31
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_TLB_INVALIDATION_TYPES_H_
+#define _XE_GT_TLB_INVALIDATION_TYPES_H_
+
+#include <linux/dma-fence.h>
+
+/**
+ * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence
+ *
+ * Optionally passed to xe_gt_tlb_invalidation and will be signaled upon TLB
+ * invalidation completion.
+ */
+struct xe_gt_tlb_invalidation_fence {
+	/** @base: dma fence base */
+	struct dma_fence base;
+	/** @link: link into list of pending tlb fences */
+	struct list_head link;
+	/** @seqno: seqno of TLB invalidation to signal fence one */
+	int seqno;
+	/** @invalidation_time: time of TLB invalidation */
+	ktime_t invalidation_time;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
new file mode 100644
index 000000000000..a8d7f272c30a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt_topology.h"
+
+#include <linux/bitmap.h>
+
+#include "regs/xe_gt_regs.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+
+#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
+#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
+
+static void
+load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
+{
+	va_list argp;
+	u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
+	int i;
+
+	if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
+		numregs = XE_MAX_DSS_FUSE_REGS;
+
+	va_start(argp, numregs);
+	for (i = 0; i < numregs; i++)
+		fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg));
+	va_end(argp);
+
+	bitmap_from_arr32(mask, fuse_val, numregs * 32);
+}
+
+static void
+load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
+	u32 val = 0;
+	int i;
+
+	BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
+
+	/*
+	 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
+	 * of enable).
+	 */
+	if (GRAPHICS_VERx100(xe) < 1250)
+		reg_val = ~reg_val & XELP_EU_MASK;
+
+	/* On PVC, one bit = one EU */
+	if (GRAPHICS_VERx100(xe) == 1260) {
+		val = reg_val;
+	} else {
+		/* All other platforms, one bit = 2 EU */
+		for (i = 0; i < fls(reg_val); i++)
+			if (reg_val & BIT(i))
+				val |= 0x3 << 2 * i;
+	}
+
+	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
+}
+
+static void
+get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
+{
+	if (GRAPHICS_VER(xe) > 20) {
+		*geometry_regs = 3;
+		*compute_regs = 3;
+	} else if (GRAPHICS_VERx100(xe) == 1260) {
+		*geometry_regs = 0;
+		*compute_regs = 2;
+	} else if (GRAPHICS_VERx100(xe) >= 1250) {
+		*geometry_regs = 1;
+		*compute_regs = 1;
+	} else {
+		*geometry_regs = 1;
+		*compute_regs = 0;
+	}
+}
+
+void
+xe_gt_topology_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct drm_printer p = drm_debug_printer("GT topology");
+	int num_geometry_regs, num_compute_regs;
+
+	get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
+
+	/*
+	 * Register counts returned shouldn't exceed the number of registers
+	 * passed as parameters below.
+	 */
+	drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
+	drm_WARN_ON(&xe->drm, num_compute_regs > 3);
+
+	load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
+		      num_geometry_regs,
+		      XELP_GT_GEOMETRY_DSS_ENABLE,
+		      XE2_GT_GEOMETRY_DSS_1,
+		      XE2_GT_GEOMETRY_DSS_2);
+	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
+		      XEHP_GT_COMPUTE_DSS_ENABLE,
+		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
+		      XE2_GT_COMPUTE_DSS_2);
+	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
+
+	xe_gt_topology_dump(gt, &p);
+}
+
+void
+xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
+		   gt->fuse_topo.g_dss_mask);
+	drm_printf(p, "dss mask (compute):  %*pb\n", XE_MAX_DSS_FUSE_BITS,
+		   gt->fuse_topo.c_dss_mask);
+
+	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
+		   gt->fuse_topo.eu_mask_per_dss);
+
+}
+
+/*
+ * Used to obtain the index of the first DSS.  Can start searching from the
+ * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
+ * groupsize and groupnum are non-zero.
+ */
+unsigned int
+xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
+{
+	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
+}
+
+bool xe_dss_mask_empty(const xe_dss_mask_t mask)
+{
+	return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS);
+}
+
+/**
+ * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
+ * @gt: GT to check
+ * @quad: Which quadrant of the DSS space to check
+ *
+ * Since Xe_HP platforms can have up to four CCS engines, those engines
+ * are each logically associated with a quarter of the possible DSS.  If there
+ * are no DSS present in one of the four quadrants of the DSS space, the
+ * corresponding CCS engine is also not available for use.
+ *
+ * Returns false if all DSS in a quadrant of the GT are fused off, else true.
+ */
+bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	xe_dss_mask_t all_dss;
+	int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
+
+	bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
+		  XE_MAX_DSS_FUSE_BITS);
+
+	get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
+	dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
+
+	quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
+
+	return quad_first < (quad + 1) * dss_per_quad;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
new file mode 100644
index 000000000000..d1b54fb52ea6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_TOPOLOGY_H_
+#define _XE_GT_TOPOLOGY_H_
+
+#include "xe_gt_types.h"
+
+struct drm_printer;
+
+void xe_gt_topology_init(struct xe_gt *gt);
+
+void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p);
+
+unsigned int
+xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum);
+
+bool xe_dss_mask_empty(const xe_dss_mask_t mask);
+
+bool
+xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
+
+#endif /* _XE_GT_TOPOLOGY_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
new file mode 100644
index 000000000000..f74684660475
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022-2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_TYPES_H_
+#define _XE_GT_TYPES_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_gt_idle_types.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence_types.h"
+#include "xe_reg_sr_types.h"
+#include "xe_sa_types.h"
+#include "xe_uc_types.h"
+
+struct xe_exec_queue_ops;
+struct xe_migrate;
+struct xe_ring_ops;
+
+enum xe_gt_type {
+	XE_GT_TYPE_UNINITIALIZED,
+	XE_GT_TYPE_MAIN,
+	XE_GT_TYPE_MEDIA,
+};
+
+#define XE_MAX_DSS_FUSE_REGS	3
+#define XE_MAX_EU_FUSE_REGS	1
+
+typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
+typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_EU_FUSE_REGS)];
+
+struct xe_mmio_range {
+	u32 start;
+	u32 end;
+};
+
+/*
+ * The hardware has multiple kinds of multicast register ranges that need
+ * special register steering (and future platforms are expected to add
+ * additional types).
+ *
+ * During driver startup, we initialize the steering control register to
+ * direct reads to a slice/subslice that are valid for the 'subslice' class
+ * of multicast registers.  If another type of steering does not have any
+ * overlap in valid steering targets with 'subslice' style registers, we will
+ * need to explicitly re-steer reads of registers of the other type.
+ *
+ * Only the replication types that may need additional non-default steering
+ * are listed here.
+ */
+enum xe_steering_type {
+	L3BANK,
+	MSLICE,
+	LNCF,
+	DSS,
+	OADDRM,
+	SQIDI_PSMI,
+
+	/*
+	 * On some platforms there are multiple types of MCR registers that
+	 * will always return a non-terminated value at instance (0, 0).  We'll
+	 * lump those all into a single category to keep things simple.
+	 */
+	INSTANCE0,
+
+	/*
+	 * Register ranges that don't need special steering for each register:
+	 * it's sufficient to keep the HW-default for the selector, or only
+	 * change it once, on GT initialization. This needs to be the last
+	 * steering type.
+	 */
+	IMPLICIT_STEERING,
+	NUM_STEERING_TYPES
+};
+
+#define gt_to_tile(gt__)							\
+	_Generic(gt__,								\
+		 const struct xe_gt * : (const struct xe_tile *)((gt__)->tile),	\
+		 struct xe_gt * : (gt__)->tile)
+
+#define gt_to_xe(gt__)										\
+	_Generic(gt__,										\
+		 const struct xe_gt * : (const struct xe_device *)(gt_to_tile(gt__)->xe),	\
+		 struct xe_gt * : gt_to_tile(gt__)->xe)
+
+/**
+ * struct xe_gt - A "Graphics Technology" unit of the GPU
+ *
+ * A GT ("Graphics Technology") is the subset of a GPU primarily responsible
+ * for implementing the graphics, compute, and/or media IP.  It encapsulates
+ * the hardware engines, programmable execution units, and GuC.   Each GT has
+ * its own handling of power management (RC6+forcewake) and multicast register
+ * steering.
+ *
+ * A GPU/tile may have a single GT that supplies all graphics, compute, and
+ * media functionality, or the graphics/compute and media may be split into
+ * separate GTs within a tile.
+ */
+struct xe_gt {
+	/** @tile: Backpointer to GT's tile */
+	struct xe_tile *tile;
+
+	/** @info: GT info */
+	struct {
+		/** @type: type of GT */
+		enum xe_gt_type type;
+		/** @id: Unique ID of this GT within the PCI Device */
+		u8 id;
+		/** @reference_clock: clock frequency */
+		u32 reference_clock;
+		/** @engine_mask: mask of engines present on GT */
+		u64 engine_mask;
+		/**
+		 * @__engine_mask: mask of engines present on GT read from
+		 * xe_pci.c, used to fake reading the engine_mask from the
+		 * hwconfig blob.
+		 */
+		u64 __engine_mask;
+	} info;
+
+	/**
+	 * @mmio: mmio info for GT.  All GTs within a tile share the same
+	 * register space, but have their own copy of GSI registers at a
+	 * specific offset, as well as their own forcewake handling.
+	 */
+	struct {
+		/** @fw: force wake for GT */
+		struct xe_force_wake fw;
+		/**
+		 * @adj_limit: adjust MMIO address if address is below this
+		 * value
+		 */
+		u32 adj_limit;
+		/** @adj_offset: offect to add to MMIO address when adjusting */
+		u32 adj_offset;
+	} mmio;
+
+	/**
+	 * @reg_sr: table with registers to be restored on GT init/resume/reset
+	 */
+	struct xe_reg_sr reg_sr;
+
+	/** @reset: state for GT resets */
+	struct {
+		/**
+		 * @worker: work so GT resets can done async allowing to reset
+		 * code to safely flush all code paths
+		 */
+		struct work_struct worker;
+	} reset;
+
+	/** @tlb_invalidation: TLB invalidation state */
+	struct {
+		/** @seqno: TLB invalidation seqno, protected by CT lock */
+#define TLB_INVALIDATION_SEQNO_MAX	0x100000
+		int seqno;
+		/**
+		 * @seqno_recv: last received TLB invalidation seqno, protected by CT lock
+		 */
+		int seqno_recv;
+		/**
+		 * @pending_fences: list of pending fences waiting TLB
+		 * invaliations, protected by CT lock
+		 */
+		struct list_head pending_fences;
+		/**
+		 * @pending_lock: protects @pending_fences and updating
+		 * @seqno_recv.
+		 */
+		spinlock_t pending_lock;
+		/**
+		 * @fence_tdr: schedules a delayed call to
+		 * xe_gt_tlb_fence_timeout after the timeut interval is over.
+		 */
+		struct delayed_work fence_tdr;
+		/** @fence_context: context for TLB invalidation fences */
+		u64 fence_context;
+		/**
+		 * @fence_seqno: seqno to TLB invalidation fences, protected by
+		 * tlb_invalidation.lock
+		 */
+		u32 fence_seqno;
+		/** @lock: protects TLB invalidation fences */
+		spinlock_t lock;
+	} tlb_invalidation;
+
+	/**
+	 * @ccs_mode: Number of compute engines enabled.
+	 * Allows fixed mapping of available compute slices to compute engines.
+	 * By default only the first available compute engine is enabled and all
+	 * available compute slices are allocated to it.
+	 */
+	u32 ccs_mode;
+
+	/** @usm: unified shared memory state */
+	struct {
+		/**
+		 * @bb_pool: Pool from which batchbuffers, for USM operations
+		 * (e.g. migrations, fixing page tables), are allocated.
+		 * Dedicated pool needed so USM operations to not get blocked
+		 * behind any user operations which may have resulted in a
+		 * fault.
+		 */
+		struct xe_sa_manager *bb_pool;
+		/**
+		 * @reserved_bcs_instance: reserved BCS instance used for USM
+		 * operations (e.g. mmigrations, fixing page tables)
+		 */
+		u16 reserved_bcs_instance;
+		/** @pf_wq: page fault work queue, unbound, high priority */
+		struct workqueue_struct *pf_wq;
+		/** @acc_wq: access counter work queue, unbound, high priority */
+		struct workqueue_struct *acc_wq;
+		/**
+		 * @pf_queue: Page fault queue used to sync faults so faults can
+		 * be processed not under the GuC CT lock. The queue is sized so
+		 * it can sync all possible faults (1 per physical engine).
+		 * Multiple queues exists for page faults from different VMs are
+		 * be processed in parallel.
+		 */
+		struct pf_queue {
+			/** @gt: back pointer to GT */
+			struct xe_gt *gt;
+#define PF_QUEUE_NUM_DW	128
+			/** @data: data in the page fault queue */
+			u32 data[PF_QUEUE_NUM_DW];
+			/**
+			 * @head: head pointer in DWs for page fault queue,
+			 * moved by worker which processes faults.
+			 */
+			u16 head;
+			/**
+			 * @tail: tail pointer in DWs for page fault queue,
+			 * moved by G2H handler.
+			 */
+			u16 tail;
+			/** @lock: protects page fault queue */
+			spinlock_t lock;
+			/** @worker: to process page faults */
+			struct work_struct worker;
+#define NUM_PF_QUEUE	4
+		} pf_queue[NUM_PF_QUEUE];
+		/**
+		 * @acc_queue: Same as page fault queue, cannot process access
+		 * counters under CT lock.
+		 */
+		struct acc_queue {
+			/** @gt: back pointer to GT */
+			struct xe_gt *gt;
+#define ACC_QUEUE_NUM_DW	128
+			/** @data: data in the page fault queue */
+			u32 data[ACC_QUEUE_NUM_DW];
+			/**
+			 * @head: head pointer in DWs for page fault queue,
+			 * moved by worker which processes faults.
+			 */
+			u16 head;
+			/**
+			 * @tail: tail pointer in DWs for page fault queue,
+			 * moved by G2H handler.
+			 */
+			u16 tail;
+			/** @lock: protects page fault queue */
+			spinlock_t lock;
+			/** @worker: to process access counters */
+			struct work_struct worker;
+#define NUM_ACC_QUEUE	4
+		} acc_queue[NUM_ACC_QUEUE];
+	} usm;
+
+	/** @ordered_wq: used to serialize GT resets and TDRs */
+	struct workqueue_struct *ordered_wq;
+
+	/** @uc: micro controllers on the GT */
+	struct xe_uc uc;
+
+	/** @gtidle: idle properties of GT */
+	struct xe_gt_idle gtidle;
+
+	/** @exec_queue_ops: submission backend exec queue operations */
+	const struct xe_exec_queue_ops *exec_queue_ops;
+
+	/**
+	 * @ring_ops: ring operations for this hw engine (1 per engine class)
+	 */
+	const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX];
+
+	/** @fence_irq: fence IRQs (1 per engine class) */
+	struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX];
+
+	/** @default_lrc: default LRC state */
+	void *default_lrc[XE_ENGINE_CLASS_MAX];
+
+	/** @hw_engines: hardware engines on the GT */
+	struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES];
+
+	/** @eclass: per hardware engine class interface on the GT */
+	struct xe_hw_engine_class_intf  eclass[XE_ENGINE_CLASS_MAX];
+
+	/** @pcode: GT's PCODE */
+	struct {
+		/** @lock: protecting GT's PCODE mailbox data */
+		struct mutex lock;
+	} pcode;
+
+	/** @sysfs: sysfs' kobj used by xe_gt_sysfs */
+	struct kobject *sysfs;
+
+	/** @freq: Main GT freq sysfs control */
+	struct kobject *freq;
+
+	/** @mocs: info */
+	struct {
+		/** @uc_index: UC index */
+		u8 uc_index;
+		/** @wb_index: WB index, only used on L3_CCS platforms */
+		u8 wb_index;
+	} mocs;
+
+	/** @fuse_topo: GT topology reported by fuse registers */
+	struct {
+		/** @g_dss_mask: dual-subslices usable by geometry */
+		xe_dss_mask_t g_dss_mask;
+
+		/** @c_dss_mask: dual-subslices usable by compute */
+		xe_dss_mask_t c_dss_mask;
+
+		/** @eu_mask_per_dss: EU mask per DSS*/
+		xe_eu_mask_t eu_mask_per_dss;
+	} fuse_topo;
+
+	/** @steering: register steering for individual HW units */
+	struct {
+		/* @ranges: register ranges used for this steering type */
+		const struct xe_mmio_range *ranges;
+
+		/** @group_target: target to steer accesses to */
+		u16 group_target;
+		/** @instance_target: instance to steer accesses to */
+		u16 instance_target;
+	} steering[NUM_STEERING_TYPES];
+
+	/**
+	 * @mcr_lock: protects the MCR_SELECTOR register for the duration
+	 *    of a steered operation
+	 */
+	spinlock_t mcr_lock;
+
+	/** @wa_active: keep track of active workarounds */
+	struct {
+		/** @gt: bitmap with active GT workarounds */
+		unsigned long *gt;
+		/** @engine: bitmap with active engine workarounds */
+		unsigned long *engine;
+		/** @lrc: bitmap with active LRC workarounds */
+		unsigned long *lrc;
+		/** @oob: bitmap with active OOB workaroudns */
+		unsigned long *oob;
+	} wa_active;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
new file mode 100644
index 000000000000..482cb0df9f15
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -0,0 +1,911 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc.h"
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_errors_abi.h"
+#include "generated/xe_wa_oob.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_guc_ads.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_hwconfig.h"
+#include "xe_guc_log.h"
+#include "xe_guc_pc.h"
+#include "xe_guc_submit.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "xe_uc.h"
+#include "xe_uc_fw.h"
+#include "xe_wa.h"
+#include "xe_wopcm.h"
+
+/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
+#define GUC_GGTT_TOP    0xFEE00000
+static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
+			    struct xe_bo *bo)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 addr = xe_bo_ggtt_addr(bo);
+
+	xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc)));
+	xe_assert(xe, addr < GUC_GGTT_TOP);
+	xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr);
+
+	return addr;
+}
+
+static u32 guc_ctl_debug_flags(struct xe_guc *guc)
+{
+	u32 level = xe_guc_log_get_level(&guc->log);
+	u32 flags = 0;
+
+	if (!GUC_LOG_LEVEL_IS_VERBOSE(level))
+		flags |= GUC_LOG_DISABLED;
+	else
+		flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) <<
+			 GUC_LOG_VERBOSITY_SHIFT;
+
+	return flags;
+}
+
+static u32 guc_ctl_feature_flags(struct xe_guc *guc)
+{
+	return GUC_CTL_ENABLE_SLPC;
+}
+
+static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
+{
+	u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT;
+	u32 flags;
+
+	#if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0)
+	#define LOG_UNIT SZ_1M
+	#define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS
+	#else
+	#define LOG_UNIT SZ_4K
+	#define LOG_FLAG 0
+	#endif
+
+	#if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0)
+	#define CAPTURE_UNIT SZ_1M
+	#define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS
+	#else
+	#define CAPTURE_UNIT SZ_4K
+	#define CAPTURE_FLAG 0
+	#endif
+
+	BUILD_BUG_ON(!CRASH_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT));
+	BUILD_BUG_ON(!DEBUG_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT));
+	BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
+
+	BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) >
+			(GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
+	BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) >
+			(GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT));
+	BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) >
+			(GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT));
+
+	flags = GUC_LOG_VALID |
+		GUC_LOG_NOTIFY_ON_HALF_FULL |
+		CAPTURE_FLAG |
+		LOG_FLAG |
+		((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
+		((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
+		((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) <<
+		 GUC_LOG_CAPTURE_SHIFT) |
+		(offset << GUC_LOG_BUF_ADDR_SHIFT);
+
+	#undef LOG_UNIT
+	#undef LOG_FLAG
+	#undef CAPTURE_UNIT
+	#undef CAPTURE_FLAG
+
+	return flags;
+}
+
+static u32 guc_ctl_ads_flags(struct xe_guc *guc)
+{
+	u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT;
+	u32 flags = ads << GUC_ADS_ADDR_SHIFT;
+
+	return flags;
+}
+
+static u32 guc_ctl_wa_flags(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 flags = 0;
+
+	if (XE_WA(gt, 22012773006))
+		flags |= GUC_WA_POLLCS;
+
+	if (XE_WA(gt, 16011759253))
+		flags |= GUC_WA_GAM_CREDITS;
+
+	if (XE_WA(gt, 14014475959))
+		flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
+
+	if (XE_WA(gt, 22011391025) || XE_WA(gt, 14012197797))
+		flags |= GUC_WA_DUAL_QUEUE;
+
+	/*
+	 * Wa_22011802037: FIXME - there's more to be done than simply setting
+	 * this flag: make sure each CS is stopped when preparing for GT reset
+	 * and wait for pending MI_FW.
+	 */
+	if (GRAPHICS_VERx100(xe) < 1270)
+		flags |= GUC_WA_PRE_PARSER;
+
+	if (XE_WA(gt, 16011777198))
+		flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
+
+	if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685))
+		flags |= GUC_WA_CONTEXT_ISOLATION;
+
+	if ((XE_WA(gt, 16015675438) || XE_WA(gt, 18020744125)) &&
+	    !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER))
+		flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
+
+	if (XE_WA(gt, 1509372804))
+		flags |= GUC_WA_RENDER_RST_RC6_EXIT;
+
+	return flags;
+}
+
+static u32 guc_ctl_devid(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+
+	return (((u32)xe->info.devid) << 16) | xe->info.revid;
+}
+
+static void guc_init_params(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 *params = guc->params;
+	int i;
+
+	BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
+	BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT);
+
+	params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
+	params[GUC_CTL_FEATURE] = 0;
+	params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
+	params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc);
+	params[GUC_CTL_WA] = 0;
+	params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
+
+	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+		drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+}
+
+static void guc_init_params_post_hwconfig(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 *params = guc->params;
+	int i;
+
+	BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
+	BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT);
+
+	params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
+	params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
+	params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
+	params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc);
+	params[GUC_CTL_WA] = guc_ctl_wa_flags(guc);
+	params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
+
+	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+		drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+}
+
+/*
+ * Initialize the GuC parameter block before starting the firmware
+ * transfer. These parameters are read by the firmware on startup
+ * and cannot be changed thereafter.
+ */
+static void guc_write_params(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	int i;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	xe_mmio_write32(gt, SOFT_SCRATCH(0), 0);
+
+	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+		xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]);
+}
+
+static void guc_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc *guc = arg;
+
+	xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+	xe_guc_pc_fini(&guc->pc);
+	xe_uc_fini_hw(&guc_to_gt(guc)->uc);
+	xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+}
+
+int xe_guc_init(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	int ret;
+
+	guc->fw.type = XE_UC_FW_TYPE_GUC;
+	ret = xe_uc_fw_init(&guc->fw);
+	if (ret)
+		goto out;
+
+	if (!xe_uc_fw_is_enabled(&guc->fw))
+		return 0;
+
+	ret = xe_guc_log_init(&guc->log);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_ads_init(&guc->ads);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_ct_init(&guc->ct);
+	if (ret)
+		goto out;
+
+	ret = xe_guc_pc_init(&guc->pc);
+	if (ret)
+		goto out;
+
+	ret = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, guc_fini, guc);
+	if (ret)
+		goto out;
+
+	guc_init_params(guc);
+
+	if (xe_gt_is_media_type(gt))
+		guc->notify_reg = MED_GUC_HOST_INTERRUPT;
+	else
+		guc->notify_reg = GUC_HOST_INTERRUPT;
+
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
+
+	return 0;
+
+out:
+	drm_err(&xe->drm, "GuC init failed with %d", ret);
+	return ret;
+}
+
+/**
+ * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load
+ * @guc: The GuC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_init_post_hwconfig(struct xe_guc *guc)
+{
+	guc_init_params_post_hwconfig(guc);
+
+	return xe_guc_ads_init_post_hwconfig(&guc->ads);
+}
+
+int xe_guc_post_load_init(struct xe_guc *guc)
+{
+	xe_guc_ads_populate_post_load(&guc->ads);
+	guc->submission_state.enabled = true;
+
+	return 0;
+}
+
+int xe_guc_reset(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 guc_status, gdrst;
+	int ret;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	xe_mmio_write32(gt, GDRST, GRDOM_GUC);
+
+	ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
+	if (ret) {
+		drm_err(&xe->drm, "GuC reset timed out, GDRST=0x%8x\n",
+			gdrst);
+		goto err_out;
+	}
+
+	guc_status = xe_mmio_read32(gt, GUC_STATUS);
+	if (!(guc_status & GS_MIA_IN_RESET)) {
+		drm_err(&xe->drm,
+			"GuC status: 0x%x, MIA core expected to be in reset\n",
+			guc_status);
+		ret = -EIO;
+		goto err_out;
+	}
+
+	return 0;
+
+err_out:
+
+	return ret;
+}
+
+static void guc_prepare_xfer(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_device *xe =  guc_to_xe(guc);
+	u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
+		GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
+		GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
+		GUC_ENABLE_MIA_CLOCK_GATING;
+
+	if (GRAPHICS_VERx100(xe) < 1250)
+		shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES |
+				GUC_ENABLE_MIA_CACHING;
+
+	if (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC)
+		shim_flags |= REG_FIELD_PREP(GUC_MOCS_INDEX_MASK, gt->mocs.uc_index);
+
+	/* Must program this register before loading the ucode with DMA */
+	xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags);
+
+	xe_mmio_write32(gt, GT_PM_CONFIG, GT_DOORBELL_ENABLE);
+}
+
+/*
+ * Supporting MMIO & in memory RSA
+ */
+static int guc_xfer_rsa(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 rsa[UOS_RSA_SCRATCH_COUNT];
+	size_t copied;
+	int i;
+
+	if (guc->fw.rsa_size > 256) {
+		u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) +
+				    xe_uc_fw_rsa_offset(&guc->fw);
+		xe_mmio_write32(gt, UOS_RSA_SCRATCH(0), rsa_ggtt_addr);
+		return 0;
+	}
+
+	copied = xe_uc_fw_copy_rsa(&guc->fw, rsa, sizeof(rsa));
+	if (copied < sizeof(rsa))
+		return -ENOMEM;
+
+	for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
+		xe_mmio_write32(gt, UOS_RSA_SCRATCH(i), rsa[i]);
+
+	return 0;
+}
+
+static int guc_wait_ucode(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 status;
+	int ret;
+
+	/*
+	 * Wait for the GuC to start up.
+	 * NB: Docs recommend not using the interrupt for completion.
+	 * Measurements indicate this should take no more than 20ms
+	 * (assuming the GT clock is at maximum frequency). So, a
+	 * timeout here indicates that the GuC has failed and is unusable.
+	 * (Higher levels of the driver may decide to reset the GuC and
+	 * attempt the ucode load again if this happens.)
+	 *
+	 * FIXME: There is a known (but exceedingly unlikely) race condition
+	 * where the asynchronous frequency management code could reduce
+	 * the GT clock while a GuC reload is in progress (during a full
+	 * GT reset). A fix is in progress but there are complex locking
+	 * issues to be resolved. In the meantime bump the timeout to
+	 * 200ms. Even at slowest clock, this should be sufficient. And
+	 * in the working case, a larger timeout makes no difference.
+	 */
+	ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS, GS_UKERNEL_MASK,
+			     FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY),
+			     200000, &status, false);
+
+	if (ret) {
+		struct drm_device *drm = &xe->drm;
+		struct drm_printer p = drm_info_printer(drm->dev);
+
+		drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
+		drm_info(drm, "GuC load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
+			 REG_FIELD_GET(GS_MIA_IN_RESET, status),
+			 REG_FIELD_GET(GS_BOOTROM_MASK, status),
+			 REG_FIELD_GET(GS_UKERNEL_MASK, status),
+			 REG_FIELD_GET(GS_MIA_MASK, status),
+			 REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+		if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
+			drm_info(drm, "GuC firmware signature verification failed\n");
+			ret = -ENOEXEC;
+		}
+
+		if (REG_FIELD_GET(GS_UKERNEL_MASK, status) ==
+		    XE_GUC_LOAD_STATUS_EXCEPTION) {
+			drm_info(drm, "GuC firmware exception. EIP: %#x\n",
+				 xe_mmio_read32(guc_to_gt(guc),
+						SOFT_SCRATCH(13)));
+			ret = -ENXIO;
+		}
+
+		xe_guc_log_print(&guc->log, &p);
+	} else {
+		drm_dbg(&xe->drm, "GuC successfully loaded");
+	}
+
+	return ret;
+}
+
+static int __xe_guc_upload(struct xe_guc *guc)
+{
+	int ret;
+
+	guc_write_params(guc);
+	guc_prepare_xfer(guc);
+
+	/*
+	 * Note that GuC needs the CSS header plus uKernel code to be copied
+	 * by the DMA engine in one operation, whereas the RSA signature is
+	 * loaded separately, either by copying it to the UOS_RSA_SCRATCH
+	 * register (if key size <= 256) or through a ggtt-pinned vma (if key
+	 * size > 256). The RSA size and therefore the way we provide it to the
+	 * HW is fixed for each platform and hard-coded in the bootrom.
+	 */
+	ret = guc_xfer_rsa(guc);
+	if (ret)
+		goto out;
+	/*
+	 * Current uCode expects the code to be loaded at 8k; locations below
+	 * this are used for the stack.
+	 */
+	ret = xe_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE);
+	if (ret)
+		goto out;
+
+	/* Wait for authentication */
+	ret = guc_wait_ucode(guc);
+	if (ret)
+		goto out;
+
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING);
+	return 0;
+
+out:
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+	return 0	/* FIXME: ret, don't want to stop load currently */;
+}
+
+/**
+ * xe_guc_min_load_for_hwconfig - load minimal GuC and read hwconfig table
+ * @guc: The GuC object
+ *
+ * This function uploads a minimal GuC that does not support submissions but
+ * in a state where the hwconfig table can be read. Next, it reads and parses
+ * the hwconfig table so it can be used for subsequent steps in the driver load.
+ * Lastly, it enables CT communication (XXX: this is needed for PFs/VFs only).
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_min_load_for_hwconfig(struct xe_guc *guc)
+{
+	int ret;
+
+	xe_guc_ads_populate_minimal(&guc->ads);
+
+	ret = __xe_guc_upload(guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_hwconfig_init(guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_enable_communication(guc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int xe_guc_upload(struct xe_guc *guc)
+{
+	xe_guc_ads_populate(&guc->ads);
+
+	return __xe_guc_upload(guc);
+}
+
+static void guc_handle_mmio_msg(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 msg;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	msg = xe_mmio_read32(gt, SOFT_SCRATCH(15));
+	msg &= XE_GUC_RECV_MSG_EXCEPTION |
+		XE_GUC_RECV_MSG_CRASH_DUMP_POSTED;
+	xe_mmio_write32(gt, SOFT_SCRATCH(15), 0);
+
+	if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED)
+		drm_err(&guc_to_xe(guc)->drm,
+			"Received early GuC crash dump notification!\n");
+
+	if (msg & XE_GUC_RECV_MSG_EXCEPTION)
+		drm_err(&guc_to_xe(guc)->drm,
+			"Received early GuC exception notification!\n");
+}
+
+static void guc_enable_irq(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 events = xe_gt_is_media_type(gt) ?
+		REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST)  :
+		REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
+
+	/* Primary GuC and media GuC share a single enable bit */
+	xe_mmio_write32(gt, GUC_SG_INTR_ENABLE,
+			REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST));
+
+	/*
+	 * There are separate mask bits for primary and media GuCs, so use
+	 * a RMW operation to avoid clobbering the other GuC's setting.
+	 */
+	xe_mmio_rmw32(gt, GUC_SG_INTR_MASK, events, 0);
+}
+
+int xe_guc_enable_communication(struct xe_guc *guc)
+{
+	int err;
+
+	guc_enable_irq(guc);
+
+	xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK,
+		      ARAT_EXPIRED_INTRMSK, 0);
+
+	err = xe_guc_ct_enable(&guc->ct);
+	if (err)
+		return err;
+
+	guc_handle_mmio_msg(guc);
+
+	return 0;
+}
+
+int xe_guc_suspend(struct xe_guc *guc)
+{
+	int ret;
+	u32 action[] = {
+		XE_GUC_ACTION_CLIENT_SOFT_RESET,
+	};
+
+	ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action));
+	if (ret) {
+		drm_err(&guc_to_xe(guc)->drm,
+			"GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret);
+		return ret;
+	}
+
+	xe_guc_sanitize(guc);
+	return 0;
+}
+
+void xe_guc_notify(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	const u32 default_notify_data = 0;
+
+	/*
+	 * Both GUC_HOST_INTERRUPT and MED_GUC_HOST_INTERRUPT can pass
+	 * additional payload data to the GuC but this capability is not
+	 * used by the firmware yet. Use default value in the meantime.
+	 */
+	xe_mmio_write32(gt, guc->notify_reg, default_notify_data);
+}
+
+int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_AUTHENTICATE_HUC,
+		rsa_addr
+	};
+
+	return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
+			  u32 len, u32 *response_buf)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 header, reply;
+	struct xe_reg reply_reg = xe_gt_is_media_type(gt) ?
+		MED_VF_SW_FLAG(0) : VF_SW_FLAG(0);
+	const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1;
+	int ret;
+	int i;
+
+	BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT);
+
+	xe_assert(xe, !guc->ct.enabled);
+	xe_assert(xe, len);
+	xe_assert(xe, len <= VF_SW_FLAG_COUNT);
+	xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT);
+	xe_assert(xe, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) ==
+		  GUC_HXG_ORIGIN_HOST);
+	xe_assert(xe, FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) ==
+		  GUC_HXG_TYPE_REQUEST);
+
+retry:
+	/* Not in critical data-path, just do if else for GT type */
+	if (xe_gt_is_media_type(gt)) {
+		for (i = 0; i < len; ++i)
+			xe_mmio_write32(gt, MED_VF_SW_FLAG(i),
+					request[i]);
+		xe_mmio_read32(gt, MED_VF_SW_FLAG(LAST_INDEX));
+	} else {
+		for (i = 0; i < len; ++i)
+			xe_mmio_write32(gt, VF_SW_FLAG(i),
+					request[i]);
+		xe_mmio_read32(gt, VF_SW_FLAG(LAST_INDEX));
+	}
+
+	xe_guc_notify(guc);
+
+	ret = xe_mmio_wait32(gt, reply_reg, GUC_HXG_MSG_0_ORIGIN,
+			     FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC),
+			     50000, &reply, false);
+	if (ret) {
+timeout:
+		drm_err(&xe->drm, "mmio request %#x: no reply %#x\n",
+			request[0], reply);
+		return ret;
+	}
+
+	header = xe_mmio_read32(gt, reply_reg);
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+	    GUC_HXG_TYPE_NO_RESPONSE_BUSY) {
+		/*
+		 * Once we got a BUSY reply we must wait again for the final
+		 * response but this time we can't use ORIGIN mask anymore.
+		 * To spot a right change in the reply, we take advantage that
+		 * response SUCCESS and FAILURE differ only by the single bit
+		 * and all other bits are set and can be used as a new mask.
+		 */
+		u32 resp_bits = GUC_HXG_TYPE_RESPONSE_SUCCESS & GUC_HXG_TYPE_RESPONSE_FAILURE;
+		u32 resp_mask = FIELD_PREP(GUC_HXG_MSG_0_TYPE, resp_bits);
+
+		BUILD_BUG_ON(FIELD_MAX(GUC_HXG_MSG_0_TYPE) != GUC_HXG_TYPE_RESPONSE_SUCCESS);
+		BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1);
+
+		ret = xe_mmio_wait32(gt, reply_reg,  resp_mask, resp_mask,
+				     1000000, &header, false);
+
+		if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
+			     GUC_HXG_ORIGIN_GUC))
+			goto proto;
+		if (unlikely(ret))
+			goto timeout;
+	}
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+	    GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+		u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header);
+
+		drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %#x\n",
+			request[0], reason);
+		goto retry;
+	}
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+	    GUC_HXG_TYPE_RESPONSE_FAILURE) {
+		u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header);
+		u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header);
+
+		drm_err(&xe->drm, "mmio request %#x: failure %#x/%#x\n",
+			request[0], error, hint);
+		return -ENXIO;
+	}
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) !=
+	    GUC_HXG_TYPE_RESPONSE_SUCCESS) {
+proto:
+		drm_err(&xe->drm, "mmio request %#x: unexpected reply %#x\n",
+			request[0], header);
+		return -EPROTO;
+	}
+
+	/* Just copy entire possible message response */
+	if (response_buf) {
+		response_buf[0] = header;
+
+		for (i = 1; i < VF_SW_FLAG_COUNT; i++) {
+			reply_reg.addr += sizeof(u32);
+			response_buf[i] = xe_mmio_read32(gt, reply_reg);
+		}
+	}
+
+	/* Use data from the GuC response as our return value */
+	return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header);
+}
+
+int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len)
+{
+	return xe_guc_mmio_send_recv(guc, request, len, NULL);
+}
+
+static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+			   GUC_ACTION_HOST2GUC_SELF_CFG),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY, key) |
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN, len),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32,
+			   lower_32_bits(val)),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64,
+			   upper_32_bits(val)),
+	};
+	int ret;
+
+	xe_assert(xe, len <= 2);
+	xe_assert(xe, len != 1 || !upper_32_bits(val));
+
+	/* Self config must go over MMIO */
+	ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request));
+
+	if (unlikely(ret < 0))
+		return ret;
+	if (unlikely(ret > 1))
+		return -EPROTO;
+	if (unlikely(!ret))
+		return -ENOKEY;
+
+	return 0;
+}
+
+int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val)
+{
+	return guc_self_cfg(guc, key, 1, val);
+}
+
+int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val)
+{
+	return guc_self_cfg(guc, key, 2, val);
+}
+
+void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir)
+{
+	if (iir & GUC_INTR_GUC2HOST)
+		xe_guc_ct_irq_handler(&guc->ct);
+}
+
+void xe_guc_sanitize(struct xe_guc *guc)
+{
+	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
+	xe_guc_ct_disable(&guc->ct);
+	guc->submission_state.enabled = false;
+}
+
+int xe_guc_reset_prepare(struct xe_guc *guc)
+{
+	return xe_guc_submit_reset_prepare(guc);
+}
+
+void xe_guc_reset_wait(struct xe_guc *guc)
+{
+	xe_guc_submit_reset_wait(guc);
+}
+
+void xe_guc_stop_prepare(struct xe_guc *guc)
+{
+	XE_WARN_ON(xe_guc_pc_stop(&guc->pc));
+}
+
+int xe_guc_stop(struct xe_guc *guc)
+{
+	int ret;
+
+	xe_guc_ct_disable(&guc->ct);
+
+	ret = xe_guc_submit_stop(guc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int xe_guc_start(struct xe_guc *guc)
+{
+	int ret;
+
+	ret = xe_guc_pc_start(&guc->pc);
+	XE_WARN_ON(ret);
+
+	return xe_guc_submit_start(guc);
+}
+
+void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 status;
+	int err;
+	int i;
+
+	xe_uc_fw_print(&guc->fw, p);
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return;
+
+	status = xe_mmio_read32(gt, GUC_STATUS);
+
+	drm_printf(p, "\nGuC status 0x%08x:\n", status);
+	drm_printf(p, "\tBootrom status = 0x%x\n",
+		   REG_FIELD_GET(GS_BOOTROM_MASK, status));
+	drm_printf(p, "\tuKernel status = 0x%x\n",
+		   REG_FIELD_GET(GS_UKERNEL_MASK, status));
+	drm_printf(p, "\tMIA Core status = 0x%x\n",
+		   REG_FIELD_GET(GS_MIA_MASK, status));
+	drm_printf(p, "\tLog level = %d\n",
+		   xe_guc_log_get_level(&guc->log));
+
+	drm_puts(p, "\nScratch registers:\n");
+	for (i = 0; i < SOFT_SCRATCH_COUNT; i++) {
+		drm_printf(p, "\t%2d: \t0x%x\n",
+			   i, xe_mmio_read32(gt, SOFT_SCRATCH(i)));
+	}
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+
+	xe_guc_ct_print(&guc->ct, p, false);
+	xe_guc_submit_print(guc, p);
+}
+
+/**
+ * xe_guc_in_reset() - Detect if GuC MIA is in reset.
+ * @guc: The GuC object
+ *
+ * This function detects runtime resume from d3cold by leveraging
+ * GUC_STATUS, GUC doesn't get reset during d3hot,
+ * it strictly to be called from RPM resume handler.
+ *
+ * Return: true if failed to get forcewake or GuC MIA is in Reset,
+ * otherwise false.
+ */
+bool xe_guc_in_reset(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	u32 status;
+	int err;
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return true;
+
+	status = xe_mmio_read32(gt, GUC_STATUS);
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+
+	return  status & GS_MIA_IN_RESET;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
new file mode 100644
index 000000000000..d3e49e7fd7c3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_H_
+#define _XE_GUC_H_
+
+#include "xe_gt.h"
+#include "xe_guc_types.h"
+#include "xe_hw_engine_types.h"
+#include "xe_macros.h"
+
+struct drm_printer;
+
+int xe_guc_init(struct xe_guc *guc);
+int xe_guc_init_post_hwconfig(struct xe_guc *guc);
+int xe_guc_post_load_init(struct xe_guc *guc);
+int xe_guc_reset(struct xe_guc *guc);
+int xe_guc_upload(struct xe_guc *guc);
+int xe_guc_min_load_for_hwconfig(struct xe_guc *guc);
+int xe_guc_enable_communication(struct xe_guc *guc);
+int xe_guc_suspend(struct xe_guc *guc);
+void xe_guc_notify(struct xe_guc *guc);
+int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr);
+int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len);
+int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, u32 len,
+			  u32 *response_buf);
+int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val);
+int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val);
+void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir);
+void xe_guc_sanitize(struct xe_guc *guc);
+void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p);
+int xe_guc_reset_prepare(struct xe_guc *guc);
+void xe_guc_reset_wait(struct xe_guc *guc);
+void xe_guc_stop_prepare(struct xe_guc *guc);
+int xe_guc_stop(struct xe_guc *guc);
+int xe_guc_start(struct xe_guc *guc);
+bool xe_guc_in_reset(struct xe_guc *guc);
+
+static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_RENDER:
+		return GUC_RENDER_CLASS;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		return GUC_VIDEO_CLASS;
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		return GUC_VIDEOENHANCE_CLASS;
+	case XE_ENGINE_CLASS_COPY:
+		return GUC_BLITTER_CLASS;
+	case XE_ENGINE_CLASS_COMPUTE:
+		return GUC_COMPUTE_CLASS;
+	case XE_ENGINE_CLASS_OTHER:
+		return GUC_GSC_OTHER_CLASS;
+	default:
+		XE_WARN_ON(class);
+		return -1;
+	}
+}
+
+static inline struct xe_gt *guc_to_gt(struct xe_guc *guc)
+{
+	return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static inline struct xe_device *guc_to_xe(struct xe_guc *guc)
+{
+	return gt_to_xe(guc_to_gt(guc));
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
new file mode 100644
index 000000000000..390e6f1bf4e1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_ads.h"
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_gt_ccs_mode.h"
+#include "xe_guc.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+
+/* Slack of a few additional entries per engine */
+#define ADS_REGSET_EXTRA_MAX	8
+
+static struct xe_guc *
+ads_to_guc(struct xe_guc_ads *ads)
+{
+	return container_of(ads, struct xe_guc, ads);
+}
+
+static struct xe_gt *
+ads_to_gt(struct xe_guc_ads *ads)
+{
+	return container_of(ads, struct xe_gt, uc.guc.ads);
+}
+
+static struct xe_device *
+ads_to_xe(struct xe_guc_ads *ads)
+{
+	return gt_to_xe(ads_to_gt(ads));
+}
+
+static struct iosys_map *
+ads_to_map(struct xe_guc_ads *ads)
+{
+	return &ads->bo->vmap;
+}
+
+/* UM Queue parameters: */
+#define GUC_UM_QUEUE_SIZE       (SZ_64K)
+#define GUC_PAGE_RES_TIMEOUT_US (-1)
+
+/*
+ * The Additional Data Struct (ADS) has pointers for different buffers used by
+ * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
+ * all the extra buffers indirectly linked via the ADS struct's entries.
+ *
+ * Layout of the ADS blob allocated for the GuC:
+ *
+ *      +---------------------------------------+ <== base
+ *      | guc_ads                               |
+ *      +---------------------------------------+
+ *      | guc_policies                          |
+ *      +---------------------------------------+
+ *      | guc_gt_system_info                    |
+ *      +---------------------------------------+
+ *      | guc_engine_usage                      |
+ *      +---------------------------------------+
+ *      | guc_um_init_params                    |
+ *      +---------------------------------------+ <== static
+ *      | guc_mmio_reg[countA] (engine 0.0)     |
+ *      | guc_mmio_reg[countB] (engine 0.1)     |
+ *      | guc_mmio_reg[countC] (engine 1.0)     |
+ *      |   ...                                 |
+ *      +---------------------------------------+ <== dynamic
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | golden contexts                       |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | capture lists                         |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | UM queues                             |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ *      | private data                          |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
+ */
+struct __guc_ads_blob {
+	struct guc_ads ads;
+	struct guc_policies policies;
+	struct guc_gt_system_info system_info;
+	struct guc_engine_usage engine_usage;
+	struct guc_um_init_params um_init_params;
+	/* From here on, location is dynamic! Refer to above diagram. */
+	struct guc_mmio_reg regset[0];
+} __packed;
+
+#define ads_blob_read(ads_, field_) \
+	xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
+			struct __guc_ads_blob, field_)
+
+#define ads_blob_write(ads_, field_, val_)			\
+	xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0,	\
+			struct __guc_ads_blob, field_, val_)
+
+#define info_map_write(xe_, map_, field_, val_) \
+	xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_)
+
+#define info_map_read(xe_, map_, field_) \
+	xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_)
+
+static size_t guc_ads_regset_size(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+
+	xe_assert(xe, ads->regset_size);
+
+	return ads->regset_size;
+}
+
+static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads)
+{
+	return PAGE_ALIGN(ads->golden_lrc_size);
+}
+
+static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
+{
+	/* FIXME: Allocate a proper capture list */
+	return PAGE_ALIGN(PAGE_SIZE);
+}
+
+static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+
+	if (!xe->info.has_usm)
+		return 0;
+
+	return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX;
+}
+
+static size_t guc_ads_private_data_size(struct xe_guc_ads *ads)
+{
+	return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size);
+}
+
+static size_t guc_ads_regset_offset(struct xe_guc_ads *ads)
+{
+	return offsetof(struct __guc_ads_blob, regset);
+}
+
+static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads)
+{
+	size_t offset;
+
+	offset = guc_ads_regset_offset(ads) +
+		guc_ads_regset_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_capture_offset(struct xe_guc_ads *ads)
+{
+	size_t offset;
+
+	offset = guc_ads_golden_lrc_offset(ads) +
+		guc_ads_golden_lrc_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads)
+{
+	u32 offset;
+
+	offset = guc_ads_capture_offset(ads) +
+		 guc_ads_capture_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads)
+{
+	size_t offset;
+
+	offset = guc_ads_um_queues_offset(ads) +
+		guc_ads_um_queues_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_size(struct xe_guc_ads *ads)
+{
+	return guc_ads_private_data_offset(ads) +
+		guc_ads_private_data_size(ads);
+}
+
+static bool needs_wa_1607983814(struct xe_device *xe)
+{
+	return GRAPHICS_VERx100(xe) < 1250;
+}
+
+static size_t calculate_regset_size(struct xe_gt *gt)
+{
+	struct xe_reg_sr_entry *sr_entry;
+	unsigned long sr_idx;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	unsigned int count = 0;
+
+	for_each_hw_engine(hwe, gt, id)
+		xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry)
+			count++;
+
+	count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES;
+
+	if (needs_wa_1607983814(gt_to_xe(gt)))
+		count += LNCFCMOCS_REG_COUNT;
+
+	return count * sizeof(struct guc_mmio_reg);
+}
+
+static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 mask = 0;
+
+	for_each_hw_engine(hwe, gt, id)
+		if (hwe->class == class)
+			mask |= BIT(hwe->instance);
+
+	return mask;
+}
+
+static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	size_t total_size = 0, alloc_size, real_size;
+	int class;
+
+	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+		if (!engine_enable_mask(gt, class))
+			continue;
+
+		real_size = xe_lrc_size(xe, class);
+		alloc_size = PAGE_ALIGN(real_size);
+		total_size += alloc_size;
+	}
+
+	return total_size;
+}
+
+#define MAX_GOLDEN_LRC_SIZE	(SZ_4K * 64)
+
+int xe_guc_ads_init(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_bo *bo;
+
+	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+	ads->regset_size = calculate_regset_size(gt);
+
+	bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE,
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	ads->bo = bo;
+
+	return 0;
+}
+
+/**
+ * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
+ * @ads: Additional data structures object
+ *
+ * Recalcuate golden_lrc_size & regset_size as the number hardware engines may
+ * have changed after the hwconfig was loaded. Also verify the new sizes fit in
+ * the already allocated ADS buffer object.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
+{
+	struct xe_gt *gt = ads_to_gt(ads);
+	u32 prev_regset_size = ads->regset_size;
+
+	xe_gt_assert(gt, ads->bo);
+
+	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+	ads->regset_size = calculate_regset_size(gt);
+
+	xe_gt_assert(gt, ads->golden_lrc_size +
+		     (ads->regset_size - prev_regset_size) <=
+		     MAX_GOLDEN_LRC_SIZE);
+
+	return 0;
+}
+
+static void guc_policies_init(struct xe_guc_ads *ads)
+{
+	ads_blob_write(ads, policies.dpc_promote_time,
+		       GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
+	ads_blob_write(ads, policies.max_num_work_items,
+		       GLOBAL_POLICY_MAX_NUM_WI);
+	ads_blob_write(ads, policies.global_flags, 0);
+	ads_blob_write(ads, policies.is_valid, 1);
+}
+
+static void fill_engine_enable_masks(struct xe_gt *gt,
+				     struct iosys_map *info_map)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_COPY));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE));
+	info_map_write(xe, info_map,
+		       engine_enabled_masks[GUC_VIDEOENHANCE_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE));
+	info_map_write(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS],
+		       engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER));
+}
+
+static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	u8 guc_class;
+
+	for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) {
+		if (!info_map_read(xe, &info_map,
+				   engine_enabled_masks[guc_class]))
+			continue;
+
+		ads_blob_write(ads, ads.eng_state_size[guc_class],
+			       guc_ads_golden_lrc_size(ads) -
+			       xe_lrc_skip_size(xe));
+		ads_blob_write(ads, ads.golden_context_lrca[guc_class],
+			       xe_bo_ggtt_addr(ads->bo) +
+			       guc_ads_golden_lrc_offset(ads));
+	}
+}
+
+static void guc_mapping_table_init_invalid(struct xe_gt *gt,
+					   struct iosys_map *info_map)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	unsigned int i, j;
+
+	/* Table must be set to invalid values for entries not used */
+	for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
+		for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
+			info_map_write(xe, info_map, mapping_table[i][j],
+				       GUC_MAX_INSTANCES_PER_CLASS);
+}
+
+static void guc_mapping_table_init(struct xe_gt *gt,
+				   struct iosys_map *info_map)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	guc_mapping_table_init_invalid(gt, info_map);
+
+	for_each_hw_engine(hwe, gt, id) {
+		u8 guc_class;
+
+		guc_class = xe_engine_class_to_guc_class(hwe->class);
+		info_map_write(xe, info_map,
+			       mapping_table[guc_class][hwe->logical_instance],
+			       hwe->instance);
+	}
+}
+
+static void guc_capture_list_init(struct xe_guc_ads *ads)
+{
+	int i, j;
+	u32 addr = xe_bo_ggtt_addr(ads->bo) + guc_ads_capture_offset(ads);
+
+	/* FIXME: Populate a proper capture list */
+	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
+		for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
+			ads_blob_write(ads, ads.capture_instance[i][j], addr);
+			ads_blob_write(ads, ads.capture_class[i][j], addr);
+		}
+
+		ads_blob_write(ads, ads.capture_global[i], addr);
+	}
+}
+
+static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
+				      struct iosys_map *regset_map,
+				      struct xe_reg reg,
+				      unsigned int n_entry)
+{
+	struct guc_mmio_reg entry = {
+		.offset = reg.addr,
+		.flags = reg.masked ? GUC_REGSET_MASKED : 0,
+	};
+
+	xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
+			 &entry, sizeof(entry));
+}
+
+static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
+					  struct iosys_map *regset_map,
+					  struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_hw_engine *hwe_rcs_reset_domain =
+		xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
+	struct xe_reg_sr_entry *entry;
+	unsigned long idx;
+	unsigned int count = 0;
+	const struct {
+		struct xe_reg reg;
+		bool skip;
+	} *e, extra_regs[] = {
+		{ .reg = RING_MODE(hwe->mmio_base),			},
+		{ .reg = RING_HWS_PGA(hwe->mmio_base),			},
+		{ .reg = RING_IMR(hwe->mmio_base),			},
+		{ .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain	},
+		{ .reg = CCS_MODE,
+		  .skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) },
+	};
+	u32 i;
+
+	BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX);
+
+	xa_for_each(&hwe->reg_sr.xa, idx, entry)
+		guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++);
+
+	for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) {
+		if (e->skip)
+			continue;
+
+		guc_mmio_regset_write_one(ads, regset_map, e->reg, count++);
+	}
+
+	/* Wa_1607983814 */
+	if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) {
+		for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
+			guc_mmio_regset_write_one(ads, regset_map,
+						  XELP_LNCFCMOCS(i), count++);
+		}
+	}
+
+	return count;
+}
+
+static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
+{
+	size_t regset_offset = guc_ads_regset_offset(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset;
+	struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+							    regset_offset);
+	unsigned int regset_used = 0;
+
+	for_each_hw_engine(hwe, gt, id) {
+		unsigned int count;
+		u8 gc;
+
+		/*
+		 * 1. Write all MMIO entries for this exec queue to the table. No
+		 * need to worry about fused-off engines and when there are
+		 * entries in the regset: the reg_state_list has been zero'ed
+		 * by xe_guc_ads_populate()
+		 */
+		count = guc_mmio_regset_write(ads, &regset_map, hwe);
+		if (!count)
+			continue;
+
+		/*
+		 * 2. Record in the header (ads.reg_state_list) the address
+		 * location and number of entries
+		 */
+		gc = xe_engine_class_to_guc_class(hwe->class);
+		ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr);
+		ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count);
+
+		addr += count * sizeof(struct guc_mmio_reg);
+		iosys_map_incr(&regset_map, count * sizeof(struct guc_mmio_reg));
+
+		regset_used += count * sizeof(struct guc_mmio_reg);
+	}
+
+	xe_gt_assert(gt, regset_used <= ads->regset_size);
+}
+
+static void guc_um_init_params(struct xe_guc_ads *ads)
+{
+	u32 um_queue_offset = guc_ads_um_queues_offset(ads);
+	u64 base_dpa;
+	u32 base_ggtt;
+	int i;
+
+	base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset;
+	base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset;
+
+	for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) {
+		ads_blob_write(ads, um_init_params.queue_params[i].base_dpa,
+			       base_dpa + (i * GUC_UM_QUEUE_SIZE));
+		ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address,
+			       base_ggtt + (i * GUC_UM_QUEUE_SIZE));
+		ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes,
+			       GUC_UM_QUEUE_SIZE);
+	}
+
+	ads_blob_write(ads, um_init_params.page_response_timeout_in_us,
+		       GUC_PAGE_RES_TIMEOUT_US);
+}
+
+static void guc_doorbell_init(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+
+	if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
+		u32 distdbreg =
+			xe_mmio_read32(gt, DIST_DBS_POPULATED);
+
+		ads_blob_write(ads,
+			       system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
+			       REG_FIELD_GET(DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1);
+	}
+}
+
+/**
+ * xe_guc_ads_populate_minimal - populate minimal ADS
+ * @ads: Additional data structures object
+ *
+ * This function populates a minimal ADS that does not support submissions but
+ * enough so the GuC can load and the hwconfig table can be read.
+ */
+void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
+{
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	u32 base = xe_bo_ggtt_addr(ads->bo);
+
+	xe_gt_assert(gt, ads->bo);
+
+	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
+	guc_policies_init(ads);
+	guc_prep_golden_lrc_null(ads);
+	guc_mapping_table_init_invalid(gt, &info_map);
+	guc_doorbell_init(ads);
+
+	ads_blob_write(ads, ads.scheduler_policies, base +
+		       offsetof(struct __guc_ads_blob, policies));
+	ads_blob_write(ads, ads.gt_system_info, base +
+		       offsetof(struct __guc_ads_blob, system_info));
+	ads_blob_write(ads, ads.private_data, base +
+		       guc_ads_private_data_offset(ads));
+}
+
+void xe_guc_ads_populate(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	u32 base = xe_bo_ggtt_addr(ads->bo);
+
+	xe_gt_assert(gt, ads->bo);
+
+	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
+	guc_policies_init(ads);
+	fill_engine_enable_masks(gt, &info_map);
+	guc_mmio_reg_state_init(ads);
+	guc_prep_golden_lrc_null(ads);
+	guc_mapping_table_init(gt, &info_map);
+	guc_capture_list_init(ads);
+	guc_doorbell_init(ads);
+
+	if (xe->info.has_usm) {
+		guc_um_init_params(ads);
+		ads_blob_write(ads, ads.um_init_data, base +
+			       offsetof(struct __guc_ads_blob, um_init_params));
+	}
+
+	ads_blob_write(ads, ads.scheduler_policies, base +
+		       offsetof(struct __guc_ads_blob, policies));
+	ads_blob_write(ads, ads.gt_system_info, base +
+		       offsetof(struct __guc_ads_blob, system_info));
+	ads_blob_write(ads, ads.private_data, base +
+		       guc_ads_private_data_offset(ads));
+}
+
+static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+			offsetof(struct __guc_ads_blob, system_info));
+	size_t total_size = 0, alloc_size, real_size;
+	u32 addr_ggtt, offset;
+	int class;
+
+	offset = guc_ads_golden_lrc_offset(ads);
+	addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
+
+	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+		u8 guc_class;
+
+		guc_class = xe_engine_class_to_guc_class(class);
+
+		if (!info_map_read(xe, &info_map,
+				   engine_enabled_masks[guc_class]))
+			continue;
+
+		xe_gt_assert(gt, gt->default_lrc[class]);
+
+		real_size = xe_lrc_size(xe, class);
+		alloc_size = PAGE_ALIGN(real_size);
+		total_size += alloc_size;
+
+		/*
+		 * This interface is slightly confusing. We need to pass the
+		 * base address of the full golden context and the size of just
+		 * the engine state, which is the section of the context image
+		 * that starts after the execlists LRC registers. This is
+		 * required to allow the GuC to restore just the engine state
+		 * when a watchdog reset occurs.
+		 * We calculate the engine state size by removing the size of
+		 * what comes before it in the context image (which is identical
+		 * on all engines).
+		 */
+		ads_blob_write(ads, ads.eng_state_size[guc_class],
+			       real_size - xe_lrc_skip_size(xe));
+		ads_blob_write(ads, ads.golden_context_lrca[guc_class],
+			       addr_ggtt);
+
+		xe_map_memcpy_to(xe, ads_to_map(ads), offset,
+				 gt->default_lrc[class], real_size);
+
+		addr_ggtt += alloc_size;
+		offset += alloc_size;
+	}
+
+	xe_gt_assert(gt, total_size == ads->golden_lrc_size);
+}
+
+void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
+{
+	guc_populate_golden_lrc(ads);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h
new file mode 100644
index 000000000000..138ef6267671
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ADS_H_
+#define _XE_GUC_ADS_H_
+
+#include "xe_guc_ads_types.h"
+
+int xe_guc_ads_init(struct xe_guc_ads *ads);
+int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads);
+void xe_guc_ads_populate(struct xe_guc_ads *ads);
+void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads);
+void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h
new file mode 100644
index 000000000000..4afe44bece4b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ADS_TYPES_H_
+#define _XE_GUC_ADS_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/**
+ * struct xe_guc_ads - GuC additional data structures (ADS)
+ */
+struct xe_guc_ads {
+	/** @bo: XE BO for GuC ads blob */
+	struct xe_bo *bo;
+	/** @golden_lrc_size: golden LRC size */
+	size_t golden_lrc_size;
+	/** @regset_size: size of register set passed to GuC for save/restore */
+	u32 regset_size;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
new file mode 100644
index 000000000000..24a33fa36496
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -0,0 +1,1320 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_ct.h"
+
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_klvs_abi.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_guc.h"
+#include "xe_guc_submit.h"
+#include "xe_map.h"
+#include "xe_pm.h"
+#include "xe_trace.h"
+
+/* Used when a CT send wants to block and / or receive data */
+struct g2h_fence {
+	u32 *response_buffer;
+	u32 seqno;
+	u16 response_len;
+	u16 error;
+	u16 hint;
+	u16 reason;
+	bool retry;
+	bool fail;
+	bool done;
+};
+
+static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
+{
+	g2h_fence->response_buffer = response_buffer;
+	g2h_fence->response_len = 0;
+	g2h_fence->fail = false;
+	g2h_fence->retry = false;
+	g2h_fence->done = false;
+	g2h_fence->seqno = ~0x0;
+}
+
+static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
+{
+	return g2h_fence->seqno == ~0x0;
+}
+
+static struct xe_guc *
+ct_to_guc(struct xe_guc_ct *ct)
+{
+	return container_of(ct, struct xe_guc, ct);
+}
+
+static struct xe_gt *
+ct_to_gt(struct xe_guc_ct *ct)
+{
+	return container_of(ct, struct xe_gt, uc.guc.ct);
+}
+
+static struct xe_device *
+ct_to_xe(struct xe_guc_ct *ct)
+{
+	return gt_to_xe(ct_to_gt(ct));
+}
+
+/**
+ * DOC: GuC CTB Blob
+ *
+ * We allocate single blob to hold both CTB descriptors and buffers:
+ *
+ *      +--------+-----------------------------------------------+------+
+ *      | offset | contents                                      | size |
+ *      +========+===============================================+======+
+ *      | 0x0000 | H2G CTB Descriptor (send)                     |      |
+ *      +--------+-----------------------------------------------+  4K  |
+ *      | 0x0800 | G2H CTB Descriptor (g2h)                      |      |
+ *      +--------+-----------------------------------------------+------+
+ *      | 0x1000 | H2G CT Buffer (send)                          | n*4K |
+ *      |        |                                               |      |
+ *      +--------+-----------------------------------------------+------+
+ *      | 0x1000 | G2H CT Buffer (g2h)                           | m*4K |
+ *      | + n*4K |                                               |      |
+ *      +--------+-----------------------------------------------+------+
+ *
+ * Size of each ``CT Buffer`` must be multiple of 4K.
+ * We don't expect too many messages in flight at any time, unless we are
+ * using the GuC submission. In that case each request requires a minimum
+ * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this
+ * enough space to avoid backpressure on the driver. We increase the size
+ * of the receive buffer (relative to the send) to ensure a G2H response
+ * CTB has a landing spot.
+ */
+
+#define CTB_DESC_SIZE		ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
+#define CTB_H2G_BUFFER_SIZE	(SZ_4K)
+#define CTB_G2H_BUFFER_SIZE	(4 * CTB_H2G_BUFFER_SIZE)
+#define G2H_ROOM_BUFFER_SIZE	(CTB_G2H_BUFFER_SIZE / 4)
+
+static size_t guc_ct_size(void)
+{
+	return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
+		CTB_G2H_BUFFER_SIZE;
+}
+
+static void guc_ct_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_ct *ct = arg;
+
+	xa_destroy(&ct->fence_lookup);
+}
+
+static void g2h_worker_func(struct work_struct *w);
+
+static void primelockdep(struct xe_guc_ct *ct)
+{
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+	might_lock(&ct->lock);
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_ct_init(struct xe_guc_ct *ct)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_gt *gt = ct_to_gt(ct);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_bo *bo;
+	int err;
+
+	xe_assert(xe, !(guc_ct_size() % PAGE_SIZE));
+
+	drmm_mutex_init(&xe->drm, &ct->lock);
+	spin_lock_init(&ct->fast_lock);
+	xa_init(&ct->fence_lookup);
+	INIT_WORK(&ct->g2h_worker, g2h_worker_func);
+	init_waitqueue_head(&ct->wq);
+	init_waitqueue_head(&ct->g2h_fence_wq);
+
+	primelockdep(ct);
+
+	bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	ct->bo = bo;
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+#define desc_read(xe_, guc_ctb__, field_)			\
+	xe_map_rd_field(xe_, &guc_ctb__->desc, 0,		\
+			struct guc_ct_buffer_desc, field_)
+
+#define desc_write(xe_, guc_ctb__, field_, val_)		\
+	xe_map_wr_field(xe_, &guc_ctb__->desc, 0,		\
+			struct guc_ct_buffer_desc, field_, val_)
+
+static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
+				struct iosys_map *map)
+{
+	h2g->info.size = CTB_H2G_BUFFER_SIZE / sizeof(u32);
+	h2g->info.resv_space = 0;
+	h2g->info.tail = 0;
+	h2g->info.head = 0;
+	h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+				     h2g->info.size) -
+			  h2g->info.resv_space;
+	h2g->info.broken = false;
+
+	h2g->desc = *map;
+	xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+	h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2);
+}
+
+static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
+				struct iosys_map *map)
+{
+	g2h->info.size = CTB_G2H_BUFFER_SIZE / sizeof(u32);
+	g2h->info.resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32);
+	g2h->info.head = 0;
+	g2h->info.tail = 0;
+	g2h->info.space = CIRC_SPACE(g2h->info.tail, g2h->info.head,
+				     g2h->info.size) -
+			  g2h->info.resv_space;
+	g2h->info.broken = false;
+
+	g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE);
+	xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+	g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 +
+					    CTB_H2G_BUFFER_SIZE);
+}
+
+static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct)
+{
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 desc_addr, ctb_addr, size;
+	int err;
+
+	desc_addr = xe_bo_ggtt_addr(ct->bo);
+	ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2;
+	size = ct->ctbs.h2g.info.size * sizeof(u32);
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY,
+				desc_addr);
+	if (err)
+		return err;
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY,
+				ctb_addr);
+	if (err)
+		return err;
+
+	return xe_guc_self_cfg32(guc,
+				 GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY,
+				 size);
+}
+
+static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct)
+{
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 desc_addr, ctb_addr, size;
+	int err;
+
+	desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE;
+	ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 +
+		CTB_H2G_BUFFER_SIZE;
+	size = ct->ctbs.g2h.info.size * sizeof(u32);
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
+				desc_addr);
+	if (err)
+		return err;
+
+	err = xe_guc_self_cfg64(guc,
+				GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY,
+				ctb_addr);
+	if (err)
+		return err;
+
+	return xe_guc_self_cfg32(guc,
+				 GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY,
+				 size);
+}
+
+static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
+{
+	u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+			   GUC_ACTION_HOST2GUC_CONTROL_CTB),
+		FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL,
+			   enable ? GUC_CTB_CONTROL_ENABLE :
+			   GUC_CTB_CONTROL_DISABLE),
+	};
+	int ret = xe_guc_mmio_send(ct_to_guc(ct), request, ARRAY_SIZE(request));
+
+	return ret > 0 ? -EPROTO : ret;
+}
+
+int xe_guc_ct_enable(struct xe_guc_ct *ct)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	int err;
+
+	xe_assert(xe, !ct->enabled);
+
+	guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
+	guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
+
+	err = guc_ct_ctb_h2g_register(ct);
+	if (err)
+		goto err_out;
+
+	err = guc_ct_ctb_g2h_register(ct);
+	if (err)
+		goto err_out;
+
+	err = guc_ct_control_toggle(ct, true);
+	if (err)
+		goto err_out;
+
+	mutex_lock(&ct->lock);
+	spin_lock_irq(&ct->fast_lock);
+	ct->g2h_outstanding = 0;
+	ct->enabled = true;
+	spin_unlock_irq(&ct->fast_lock);
+	mutex_unlock(&ct->lock);
+
+	smp_mb();
+	wake_up_all(&ct->wq);
+	drm_dbg(&xe->drm, "GuC CT communication channel enabled\n");
+
+	return 0;
+
+err_out:
+	drm_err(&xe->drm, "Failed to enable CT (%d)\n", err);
+
+	return err;
+}
+
+void xe_guc_ct_disable(struct xe_guc_ct *ct)
+{
+	mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */
+	spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */
+	ct->enabled = false; /* Finally disable CT communication */
+	spin_unlock_irq(&ct->fast_lock);
+	mutex_unlock(&ct->lock);
+
+	xa_destroy(&ct->fence_lookup);
+}
+
+static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
+{
+	struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+	lockdep_assert_held(&ct->lock);
+
+	if (cmd_len > h2g->info.space) {
+		h2g->info.head = desc_read(ct_to_xe(ct), h2g, head);
+		h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head,
+					     h2g->info.size) -
+				  h2g->info.resv_space;
+		if (cmd_len > h2g->info.space)
+			return false;
+	}
+
+	return true;
+}
+
+static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len)
+{
+	if (!g2h_len)
+		return true;
+
+	lockdep_assert_held(&ct->fast_lock);
+
+	return ct->ctbs.g2h.info.space > g2h_len;
+}
+
+static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len)
+{
+	lockdep_assert_held(&ct->lock);
+
+	if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len))
+		return -EBUSY;
+
+	return 0;
+}
+
+static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
+{
+	lockdep_assert_held(&ct->lock);
+	ct->ctbs.h2g.info.space -= cmd_len;
+}
+
+static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
+{
+	xe_assert(ct_to_xe(ct), g2h_len <= ct->ctbs.g2h.info.space);
+
+	if (g2h_len) {
+		lockdep_assert_held(&ct->fast_lock);
+
+		ct->ctbs.g2h.info.space -= g2h_len;
+		ct->g2h_outstanding += num_g2h;
+	}
+}
+
+static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+	lockdep_assert_held(&ct->fast_lock);
+	xe_assert(ct_to_xe(ct), ct->ctbs.g2h.info.space + g2h_len <=
+		  ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
+
+	ct->ctbs.g2h.info.space += g2h_len;
+	--ct->g2h_outstanding;
+}
+
+static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+	spin_lock_irq(&ct->fast_lock);
+	__g2h_release_space(ct, g2h_len);
+	spin_unlock_irq(&ct->fast_lock);
+}
+
+#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */
+
+static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		     u32 ct_fence_value, bool want_response)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct guc_ctb *h2g = &ct->ctbs.h2g;
+	u32 cmd[H2G_CT_HEADERS];
+	u32 tail = h2g->info.tail;
+	u32 full_len;
+	struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
+							 tail * sizeof(u32));
+
+	full_len = len + GUC_CTB_HDR_LEN;
+
+	lockdep_assert_held(&ct->lock);
+	xe_assert(xe, full_len <= GUC_CTB_MSG_MAX_LEN);
+	xe_assert(xe, tail <= h2g->info.size);
+
+	/* Command will wrap, zero fill (NOPs), return and check credits again */
+	if (tail + full_len > h2g->info.size) {
+		xe_map_memset(xe, &map, 0, 0,
+			      (h2g->info.size - tail) * sizeof(u32));
+		h2g_reserve_space(ct, (h2g->info.size - tail));
+		h2g->info.tail = 0;
+		desc_write(xe, h2g, tail, h2g->info.tail);
+
+		return -EAGAIN;
+	}
+
+	/*
+	 * dw0: CT header (including fence)
+	 * dw1: HXG header (including action code)
+	 * dw2+: action data
+	 */
+	cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
+		FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
+		FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
+	if (want_response) {
+		cmd[1] =
+			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+	} else {
+		cmd[1] =
+			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+	}
+
+	/* H2G header in cmd[1] replaces action[0] so: */
+	--len;
+	++action;
+
+	/* Write H2G ensuring visable before descriptor update */
+	xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32));
+	xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32));
+	xe_device_wmb(xe);
+
+	/* Update local copies */
+	h2g->info.tail = (tail + full_len) % h2g->info.size;
+	h2g_reserve_space(ct, full_len);
+
+	/* Update descriptor */
+	desc_write(xe, h2g, tail, h2g->info.tail);
+
+	trace_xe_guc_ctb_h2g(ct_to_gt(ct)->info.id, *(action - 1), full_len,
+			     desc_read(xe, h2g, head), h2g->info.tail);
+
+	return 0;
+}
+
+static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
+				u32 len, u32 g2h_len, u32 num_g2h,
+				struct g2h_fence *g2h_fence)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	int ret;
+
+	xe_assert(xe, !g2h_len || !g2h_fence);
+	xe_assert(xe, !num_g2h || !g2h_fence);
+	xe_assert(xe, !g2h_len || num_g2h);
+	xe_assert(xe, g2h_len || !num_g2h);
+	lockdep_assert_held(&ct->lock);
+
+	if (unlikely(ct->ctbs.h2g.info.broken)) {
+		ret = -EPIPE;
+		goto out;
+	}
+
+	if (unlikely(!ct->enabled)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (g2h_fence) {
+		g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
+		num_g2h = 1;
+
+		if (g2h_fence_needs_alloc(g2h_fence)) {
+			void *ptr;
+
+			g2h_fence->seqno = (ct->fence_seqno++ & 0xffff);
+			ptr = xa_store(&ct->fence_lookup,
+				       g2h_fence->seqno,
+				       g2h_fence, GFP_ATOMIC);
+			if (IS_ERR(ptr)) {
+				ret = PTR_ERR(ptr);
+				goto out;
+			}
+		}
+	}
+
+	if (g2h_len)
+		spin_lock_irq(&ct->fast_lock);
+retry:
+	ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len);
+	if (unlikely(ret))
+		goto out_unlock;
+
+	ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0,
+			!!g2h_fence);
+	if (unlikely(ret)) {
+		if (ret == -EAGAIN)
+			goto retry;
+		goto out_unlock;
+	}
+
+	__g2h_reserve_space(ct, g2h_len, num_g2h);
+	xe_guc_notify(ct_to_guc(ct));
+out_unlock:
+	if (g2h_len)
+		spin_unlock_irq(&ct->fast_lock);
+out:
+	return ret;
+}
+
+static void kick_reset(struct xe_guc_ct *ct)
+{
+	xe_gt_reset_async(ct_to_gt(ct));
+}
+
+static int dequeue_one_g2h(struct xe_guc_ct *ct);
+
+static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			      u32 g2h_len, u32 num_g2h,
+			      struct g2h_fence *g2h_fence)
+{
+	struct drm_device *drm = &ct_to_xe(ct)->drm;
+	struct drm_printer p = drm_info_printer(drm->dev);
+	unsigned int sleep_period_ms = 1;
+	int ret;
+
+	xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence);
+	lockdep_assert_held(&ct->lock);
+	xe_device_assert_mem_access(ct_to_xe(ct));
+
+try_again:
+	ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
+				   g2h_fence);
+
+	/*
+	 * We wait to try to restore credits for about 1 second before bailing.
+	 * In the case of H2G credits we have no choice but just to wait for the
+	 * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In
+	 * the case of G2H we process any G2H in the channel, hopefully freeing
+	 * credits as we consume the G2H messages.
+	 */
+	if (unlikely(ret == -EBUSY &&
+		     !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
+		struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+		if (sleep_period_ms == 1024)
+			goto broken;
+
+		trace_xe_guc_ct_h2g_flow_control(h2g->info.head, h2g->info.tail,
+						 h2g->info.size,
+						 h2g->info.space,
+						 len + GUC_CTB_HDR_LEN);
+		msleep(sleep_period_ms);
+		sleep_period_ms <<= 1;
+
+		goto try_again;
+	} else if (unlikely(ret == -EBUSY)) {
+		struct xe_device *xe = ct_to_xe(ct);
+		struct guc_ctb *g2h = &ct->ctbs.g2h;
+
+		trace_xe_guc_ct_g2h_flow_control(g2h->info.head,
+						 desc_read(xe, g2h, tail),
+						 g2h->info.size,
+						 g2h->info.space,
+						 g2h_fence ?
+						 GUC_CTB_HXG_MSG_MAX_LEN :
+						 g2h_len);
+
+#define g2h_avail(ct)	\
+	(desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.info.head)
+		if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding ||
+					g2h_avail(ct), HZ))
+			goto broken;
+#undef g2h_avail
+
+		if (dequeue_one_g2h(ct) < 0)
+			goto broken;
+
+		goto try_again;
+	}
+
+	return ret;
+
+broken:
+	drm_err(drm, "No forward process on H2G, reset required");
+	xe_guc_ct_print(ct, &p, true);
+	ct->ctbs.h2g.info.broken = true;
+
+	return -EDEADLK;
+}
+
+static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		       u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence)
+{
+	int ret;
+
+	xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence);
+
+	mutex_lock(&ct->lock);
+	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
+	mutex_unlock(&ct->lock);
+
+	return ret;
+}
+
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		   u32 g2h_len, u32 num_g2h)
+{
+	int ret;
+
+	ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL);
+	if (ret == -EDEADLK)
+		kick_reset(ct);
+
+	return ret;
+}
+
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			  u32 g2h_len, u32 num_g2h)
+{
+	int ret;
+
+	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL);
+	if (ret == -EDEADLK)
+		kick_reset(ct);
+
+	return ret;
+}
+
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+	int ret;
+
+	lockdep_assert_held(&ct->lock);
+
+	ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL);
+	if (ret == -EDEADLK)
+		kick_reset(ct);
+
+	return ret;
+}
+
+/*
+ * Check if a GT reset is in progress or will occur and if GT reset brought the
+ * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset.
+ */
+static bool retry_failure(struct xe_guc_ct *ct, int ret)
+{
+	if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV))
+		return false;
+
+#define ct_alive(ct)	\
+	(ct->enabled && !ct->ctbs.h2g.info.broken && !ct->ctbs.g2h.info.broken)
+	if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct),  HZ * 5))
+		return false;
+#undef ct_alive
+
+	return true;
+}
+
+static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			    u32 *response_buffer, bool no_fail)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct g2h_fence g2h_fence;
+	int ret = 0;
+
+	/*
+	 * We use a fence to implement blocking sends / receiving response data.
+	 * The seqno of the fence is sent in the H2G, returned in the G2H, and
+	 * an xarray is used as storage media with the seqno being to key.
+	 * Fields in the fence hold success, failure, retry status and the
+	 * response data. Safe to allocate on the stack as the xarray is the
+	 * only reference and it cannot be present after this function exits.
+	 */
+retry:
+	g2h_fence_init(&g2h_fence, response_buffer);
+retry_same_fence:
+	ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
+	if (unlikely(ret == -ENOMEM)) {
+		void *ptr;
+
+		/* Retry allocation /w GFP_KERNEL */
+		ptr = xa_store(&ct->fence_lookup,
+			       g2h_fence.seqno,
+			       &g2h_fence, GFP_KERNEL);
+		if (IS_ERR(ptr))
+			return PTR_ERR(ptr);
+
+		goto retry_same_fence;
+	} else if (unlikely(ret)) {
+		if (ret == -EDEADLK)
+			kick_reset(ct);
+
+		if (no_fail && retry_failure(ct, ret))
+			goto retry_same_fence;
+
+		if (!g2h_fence_needs_alloc(&g2h_fence))
+			xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+
+		return ret;
+	}
+
+	ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+	if (!ret) {
+		drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x",
+			g2h_fence.seqno, action[0]);
+		xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+		return -ETIME;
+	}
+
+	if (g2h_fence.retry) {
+		drm_warn(&xe->drm, "Send retry, action 0x%04x, reason %d",
+			 action[0], g2h_fence.reason);
+		goto retry;
+	}
+	if (g2h_fence.fail) {
+		drm_err(&xe->drm, "Send failed, action 0x%04x, error %d, hint %d",
+			action[0], g2h_fence.error, g2h_fence.hint);
+		ret = -EIO;
+	}
+
+	return ret > 0 ? 0 : ret;
+}
+
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			u32 *response_buffer)
+{
+	return guc_ct_send_recv(ct, action, len, response_buffer, false);
+}
+
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+				u32 len, u32 *response_buffer)
+{
+	return guc_ct_send_recv(ct, action, len, response_buffer, true);
+}
+
+static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+
+	lockdep_assert_held(&ct->lock);
+
+	switch (action) {
+	case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+	case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+	case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		g2h_release_space(ct, len);
+	}
+
+	return 0;
+}
+
+static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	u32 response_len = len - GUC_CTB_MSG_MIN_LEN;
+	u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
+	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]);
+	struct g2h_fence *g2h_fence;
+
+	lockdep_assert_held(&ct->lock);
+
+	g2h_fence = xa_erase(&ct->fence_lookup, fence);
+	if (unlikely(!g2h_fence)) {
+		/* Don't tear down channel, as send could've timed out */
+		drm_warn(&xe->drm, "G2H fence (%u) not found!\n", fence);
+		g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+		return 0;
+	}
+
+	xe_assert(xe, fence == g2h_fence->seqno);
+
+	if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
+		g2h_fence->fail = true;
+		g2h_fence->error =
+			FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[1]);
+		g2h_fence->hint =
+			FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[1]);
+	} else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+		g2h_fence->retry = true;
+		g2h_fence->reason =
+			FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[1]);
+	} else if (g2h_fence->response_buffer) {
+		g2h_fence->response_len = response_len;
+		memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN,
+		       response_len * sizeof(u32));
+	}
+
+	g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+
+	g2h_fence->done = true;
+	smp_mb();
+
+	wake_up_all(&ct->g2h_fence_wq);
+
+	return 0;
+}
+
+static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	u32 hxg, origin, type;
+	int ret;
+
+	lockdep_assert_held(&ct->lock);
+
+	hxg = msg[1];
+
+	origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg);
+	if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
+		drm_err(&xe->drm,
+			"G2H channel broken on read, origin=%d, reset required\n",
+			origin);
+		ct->ctbs.g2h.info.broken = true;
+
+		return -EPROTO;
+	}
+
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg);
+	switch (type) {
+	case GUC_HXG_TYPE_EVENT:
+		ret = parse_g2h_event(ct, msg, len);
+		break;
+	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+	case GUC_HXG_TYPE_RESPONSE_FAILURE:
+	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+		ret = parse_g2h_response(ct, msg, len);
+		break;
+	default:
+		drm_err(&xe->drm,
+			"G2H channel broken on read, type=%d, reset required\n",
+			type);
+		ct->ctbs.g2h.info.broken = true;
+
+		ret = -EOPNOTSUPP;
+	}
+
+	return ret;
+}
+
+static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+	u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
+	u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+	int ret = 0;
+
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+		return 0;
+
+	switch (action) {
+	case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+		ret = xe_guc_sched_done_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+		ret = xe_guc_deregister_done_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
+		ret = xe_guc_exec_queue_reset_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
+		ret = xe_guc_exec_queue_reset_failure_handler(guc, payload,
+							      adj_len);
+		break;
+	case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+		/* Selftest only at the moment */
+		break;
+	case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+	case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
+		/* FIXME: Handle this */
+		break;
+	case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR:
+		ret = xe_guc_exec_queue_memory_cat_error_handler(guc, payload,
+								 adj_len);
+		break;
+	case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+		ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+							   adj_len);
+		break;
+	case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY:
+		ret = xe_guc_access_counter_notify_handler(guc, payload,
+							   adj_len);
+		break;
+	default:
+		drm_err(&xe->drm, "unexpected action 0x%04x\n", action);
+	}
+
+	if (ret)
+		drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
+			action, ret);
+
+	return 0;
+}
+
+static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct guc_ctb *g2h = &ct->ctbs.g2h;
+	u32 tail, head, len;
+	s32 avail;
+	u32 action;
+
+	lockdep_assert_held(&ct->fast_lock);
+
+	if (!ct->enabled)
+		return -ENODEV;
+
+	if (g2h->info.broken)
+		return -EPIPE;
+
+	/* Calculate DW available to read */
+	tail = desc_read(xe, g2h, tail);
+	avail = tail - g2h->info.head;
+	if (unlikely(avail == 0))
+		return 0;
+
+	if (avail < 0)
+		avail += g2h->info.size;
+
+	/* Read header */
+	xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->info.head,
+			   sizeof(u32));
+	len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
+	if (len > avail) {
+		drm_err(&xe->drm,
+			"G2H channel broken on read, avail=%d, len=%d, reset required\n",
+			avail, len);
+		g2h->info.broken = true;
+
+		return -EPROTO;
+	}
+
+	head = (g2h->info.head + 1) % g2h->info.size;
+	avail = len - 1;
+
+	/* Read G2H message */
+	if (avail + head > g2h->info.size) {
+		u32 avail_til_wrap = g2h->info.size - head;
+
+		xe_map_memcpy_from(xe, msg + 1,
+				   &g2h->cmds, sizeof(u32) * head,
+				   avail_til_wrap * sizeof(u32));
+		xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap,
+				   &g2h->cmds, 0,
+				   (avail - avail_til_wrap) * sizeof(u32));
+	} else {
+		xe_map_memcpy_from(xe, msg + 1,
+				   &g2h->cmds, sizeof(u32) * head,
+				   avail * sizeof(u32));
+	}
+
+	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+
+	if (fast_path) {
+		if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+			return 0;
+
+		switch (action) {
+		case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+		case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+			break;	/* Process these in fast-path */
+		default:
+			return 0;
+		}
+	}
+
+	/* Update local / descriptor header */
+	g2h->info.head = (head + avail) % g2h->info.size;
+	desc_write(xe, g2h, head, g2h->info.head);
+
+	trace_xe_guc_ctb_g2h(ct_to_gt(ct)->info.id, action, len,
+			     g2h->info.head, tail);
+
+	return len;
+}
+
+static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_guc *guc = ct_to_guc(ct);
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+	u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
+	u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+	int ret = 0;
+
+	switch (action) {
+	case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+		ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+		break;
+	case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+		__g2h_release_space(ct, len);
+		ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+							   adj_len);
+		break;
+	default:
+		drm_warn(&xe->drm, "NOT_POSSIBLE");
+	}
+
+	if (ret)
+		drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
+			action, ret);
+}
+
+/**
+ * xe_guc_ct_fast_path - process critical G2H in the IRQ handler
+ * @ct: GuC CT object
+ *
+ * Anything related to page faults is critical for performance, process these
+ * critical G2H in the IRQ. This is safe as these handlers either just wake up
+ * waiters or queue another worker.
+ */
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	bool ongoing;
+	int len;
+
+	ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct));
+	if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
+		return;
+
+	spin_lock(&ct->fast_lock);
+	do {
+		len = g2h_read(ct, ct->fast_msg, true);
+		if (len > 0)
+			g2h_fast_path(ct, ct->fast_msg, len);
+	} while (len > 0);
+	spin_unlock(&ct->fast_lock);
+
+	if (ongoing)
+		xe_device_mem_access_put(xe);
+}
+
+/* Returns less than zero on error, 0 on done, 1 on more available */
+static int dequeue_one_g2h(struct xe_guc_ct *ct)
+{
+	int len;
+	int ret;
+
+	lockdep_assert_held(&ct->lock);
+
+	spin_lock_irq(&ct->fast_lock);
+	len = g2h_read(ct, ct->msg, false);
+	spin_unlock_irq(&ct->fast_lock);
+	if (len <= 0)
+		return len;
+
+	ret = parse_g2h_msg(ct, ct->msg, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	ret = process_g2h_msg(ct, ct->msg, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	return 1;
+}
+
+static void g2h_worker_func(struct work_struct *w)
+{
+	struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker);
+	bool ongoing;
+	int ret;
+
+	/*
+	 * Normal users must always hold mem_access.ref around CT calls. However
+	 * during the runtime pm callbacks we rely on CT to talk to the GuC, but
+	 * at this stage we can't rely on mem_access.ref and even the
+	 * callback_task will be different than current.  For such cases we just
+	 * need to ensure we always process the responses from any blocking
+	 * ct_send requests or where we otherwise expect some response when
+	 * initiated from those callbacks (which will need to wait for the below
+	 * dequeue_one_g2h()).  The dequeue_one_g2h() will gracefully fail if
+	 * the device has suspended to the point that the CT communication has
+	 * been disabled.
+	 *
+	 * If we are inside the runtime pm callback, we can be the only task
+	 * still issuing CT requests (since that requires having the
+	 * mem_access.ref).  It seems like it might in theory be possible to
+	 * receive unsolicited events from the GuC just as we are
+	 * suspending-resuming, but those will currently anyway be lost when
+	 * eventually exiting from suspend, hence no need to wake up the device
+	 * here. If we ever need something stronger than get_if_ongoing() then
+	 * we need to be careful with blocking the pm callbacks from getting CT
+	 * responses, if the worker here is blocked on those callbacks
+	 * completing, creating a deadlock.
+	 */
+	ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct));
+	if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
+		return;
+
+	do {
+		mutex_lock(&ct->lock);
+		ret = dequeue_one_g2h(ct);
+		mutex_unlock(&ct->lock);
+
+		if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
+			struct drm_device *drm = &ct_to_xe(ct)->drm;
+			struct drm_printer p = drm_info_printer(drm->dev);
+
+			xe_guc_ct_print(ct, &p, false);
+			kick_reset(ct);
+		}
+	} while (ret == 1);
+
+	if (ongoing)
+		xe_device_mem_access_put(ct_to_xe(ct));
+}
+
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+				     struct guc_ctb_snapshot *snapshot,
+				     bool atomic)
+{
+	u32 head, tail;
+
+	xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+			   sizeof(struct guc_ct_buffer_desc));
+	memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
+
+	snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32),
+				       atomic ? GFP_ATOMIC : GFP_KERNEL);
+
+	if (!snapshot->cmds) {
+		drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CTB info will be available.\n");
+		return;
+	}
+
+	head = snapshot->desc.head;
+	tail = snapshot->desc.tail;
+
+	if (head != tail) {
+		struct iosys_map map =
+			IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
+
+		while (head != tail) {
+			snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
+			++head;
+			if (head == ctb->info.size) {
+				head = 0;
+				map = ctb->cmds;
+			} else {
+				iosys_map_incr(&map, sizeof(u32));
+			}
+		}
+	}
+}
+
+static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
+				   struct drm_printer *p)
+{
+	u32 head, tail;
+
+	drm_printf(p, "\tsize: %d\n", snapshot->info.size);
+	drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space);
+	drm_printf(p, "\thead: %d\n", snapshot->info.head);
+	drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
+	drm_printf(p, "\tspace: %d\n", snapshot->info.space);
+	drm_printf(p, "\tbroken: %d\n", snapshot->info.broken);
+	drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
+	drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
+	drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
+
+	if (!snapshot->cmds)
+		return;
+
+	head = snapshot->desc.head;
+	tail = snapshot->desc.tail;
+
+	while (head != tail) {
+		drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
+			   snapshot->cmds[head]);
+		++head;
+		if (head == snapshot->info.size)
+			head = 0;
+	}
+}
+
+static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
+{
+	kfree(snapshot->cmds);
+}
+
+/**
+ * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
+ * @ct: GuC CT object.
+ * @atomic: Boolean to indicate if this is called from atomic context like
+ * reset or CTB handler or from some regular path like debugfs.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a GuC CT snapshot object that must be freed by the caller
+ * by using `xe_guc_ct_snapshot_free`.
+ */
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
+						      bool atomic)
+{
+	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_guc_ct_snapshot *snapshot;
+
+	snapshot = kzalloc(sizeof(*snapshot),
+			   atomic ? GFP_ATOMIC : GFP_KERNEL);
+
+	if (!snapshot) {
+		drm_err(&xe->drm, "Skipping CTB snapshot entirely.\n");
+		return NULL;
+	}
+
+	if (ct->enabled) {
+		snapshot->ct_enabled = true;
+		snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
+		guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
+					 &snapshot->h2g, atomic);
+		guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h,
+					 &snapshot->g2h, atomic);
+	}
+
+	return snapshot;
+}
+
+/**
+ * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
+ * @snapshot: GuC CT snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC CT snapshot object.
+ */
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+			      struct drm_printer *p)
+{
+	if (!snapshot)
+		return;
+
+	if (snapshot->ct_enabled) {
+		drm_puts(p, "\nH2G CTB (all sizes in DW):\n");
+		guc_ctb_snapshot_print(&snapshot->h2g, p);
+
+		drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
+		guc_ctb_snapshot_print(&snapshot->g2h, p);
+
+		drm_printf(p, "\tg2h outstanding: %d\n",
+			   snapshot->g2h_outstanding);
+	} else {
+		drm_puts(p, "\nCT disabled\n");
+	}
+}
+
+/**
+ * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot.
+ * @snapshot: GuC CT snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
+{
+	if (!snapshot)
+		return;
+
+	guc_ctb_snapshot_free(&snapshot->h2g);
+	guc_ctb_snapshot_free(&snapshot->g2h);
+	kfree(snapshot);
+}
+
+/**
+ * xe_guc_ct_print - GuC CT Print.
+ * @ct: GuC CT.
+ * @p: drm_printer where it will be printed out.
+ * @atomic: Boolean to indicate if this is called from atomic context like
+ * reset or CTB handler or from some regular path like debugfs.
+ *
+ * This function quickly capture a snapshot and immediately print it out.
+ */
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic)
+{
+	struct xe_guc_ct_snapshot *snapshot;
+
+	snapshot = xe_guc_ct_snapshot_capture(ct, atomic);
+	xe_guc_ct_snapshot_print(snapshot, p);
+	xe_guc_ct_snapshot_free(snapshot);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
new file mode 100644
index 000000000000..f15f8a4857e0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CT_H_
+#define _XE_GUC_CT_H_
+
+#include "xe_guc_ct_types.h"
+
+struct drm_printer;
+
+int xe_guc_ct_init(struct xe_guc_ct *ct);
+int xe_guc_ct_enable(struct xe_guc_ct *ct);
+void xe_guc_ct_disable(struct xe_guc_ct *ct);
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
+
+struct xe_guc_ct_snapshot *
+xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+			      struct drm_printer *p);
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
+
+static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
+{
+	wake_up_all(&ct->wq);
+	if (ct->enabled)
+		queue_work(system_unbound_wq, &ct->g2h_worker);
+	xe_guc_ct_fast_path(ct);
+}
+
+/* Basic CT send / receives */
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+		   u32 g2h_len, u32 num_g2h);
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			  u32 g2h_len, u32 num_g2h);
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+			u32 *response_buffer);
+static inline int
+xe_guc_ct_send_block(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+	return xe_guc_ct_send_recv(ct, action, len, NULL);
+}
+
+/* This is only version of the send CT you can call from a G2H handler */
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action,
+			       u32 len);
+
+/* Can't fail because a GT reset is in progress */
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+				u32 len, u32 *response_buffer);
+static inline int
+xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+	return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
new file mode 100644
index 000000000000..d814d4ee3fc6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CT_TYPES_H_
+#define _XE_GUC_CT_TYPES_H_
+
+#include <linux/interrupt.h>
+#include <linux/iosys-map.h>
+#include <linux/spinlock_types.h>
+#include <linux/wait.h>
+#include <linux/xarray.h>
+
+#include "abi/guc_communication_ctb_abi.h"
+
+struct xe_bo;
+
+/**
+ * struct guc_ctb_info - GuC command transport buffer (CTB) info
+ */
+struct guc_ctb_info {
+	/** @size: size of CTB commands (DW) */
+	u32 size;
+	/** @resv_space: reserved space of CTB commands (DW) */
+	u32 resv_space;
+	/** @head: head of CTB commands (DW) */
+	u32 head;
+	/** @tail: tail of CTB commands (DW) */
+	u32 tail;
+	/** @space: space in CTB commands (DW) */
+	u32 space;
+	/** @broken: channel broken */
+	bool broken;
+};
+
+/**
+ * struct guc_ctb - GuC command transport buffer (CTB)
+ */
+struct guc_ctb {
+	/** @desc: dma buffer map for CTB descriptor */
+	struct iosys_map desc;
+	/** @cmds: dma buffer map for CTB commands */
+	struct iosys_map cmds;
+	/** @info: CTB info */
+	struct guc_ctb_info info;
+};
+
+/**
+ * struct guc_ctb_snapshot - GuC command transport buffer (CTB) snapshot
+ */
+struct guc_ctb_snapshot {
+	/** @desc: snapshot of the CTB descriptor */
+	struct guc_ct_buffer_desc desc;
+	/** @cmds: snapshot of the CTB commands */
+	u32 *cmds;
+	/** @info: snapshot of the CTB info */
+	struct guc_ctb_info info;
+};
+
+/**
+ * struct xe_guc_ct_snapshot - GuC command transport (CT) snapshot
+ */
+struct xe_guc_ct_snapshot {
+	/** @ct_enabled: CT enabled info at capture time. */
+	bool ct_enabled;
+	/** @g2h_outstanding: G2H outstanding info at the capture time */
+	u32 g2h_outstanding;
+	/** @g2h: G2H CTB snapshot */
+	struct guc_ctb_snapshot g2h;
+	/** @h2g: H2G CTB snapshot */
+	struct guc_ctb_snapshot h2g;
+};
+
+/**
+ * struct xe_guc_ct - GuC command transport (CT) layer
+ *
+ * Includes a pair of CT buffers for bi-directional communication and tracking
+ * for the H2G and G2H requests sent and received through the buffers.
+ */
+struct xe_guc_ct {
+	/** @bo: XE BO for CT */
+	struct xe_bo *bo;
+	/** @lock: protects everything in CT layer */
+	struct mutex lock;
+	/** @fast_lock: protects G2H channel and credits */
+	spinlock_t fast_lock;
+	/** @ctbs: buffers for sending and receiving commands */
+	struct {
+		/** @send: Host to GuC (H2G, send) channel */
+		struct guc_ctb h2g;
+		/** @recv: GuC to Host (G2H, receive) channel */
+		struct guc_ctb g2h;
+	} ctbs;
+	/** @g2h_outstanding: number of outstanding G2H */
+	u32 g2h_outstanding;
+	/** @g2h_worker: worker to process G2H messages */
+	struct work_struct g2h_worker;
+	/** @enabled: CT enabled */
+	bool enabled;
+	/** @fence_seqno: G2H fence seqno - 16 bits used by CT */
+	u32 fence_seqno;
+	/** @fence_lookup: G2H fence lookup */
+	struct xarray fence_lookup;
+	/** @wq: wait queue used for reliable CT sends and freeing G2H credits */
+	wait_queue_head_t wq;
+	/** @g2h_fence_wq: wait queue used for G2H fencing */
+	wait_queue_head_t g2h_fence_wq;
+	/** @msg: Message buffer */
+	u32 msg[GUC_CTB_MSG_MAX_LEN];
+	/** @fast_msg: Message buffer */
+	u32 fast_msg[GUC_CTB_MSG_MAX_LEN];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
new file mode 100644
index 000000000000..ffd7d53bcc42
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_debugfs.h"
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_log.h"
+#include "xe_macros.h"
+
+static struct xe_guc *node_to_guc(struct drm_info_node *node)
+{
+	return node->info_ent->data;
+}
+
+static int guc_info(struct seq_file *m, void *data)
+{
+	struct xe_guc *guc = node_to_guc(m->private);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_guc_print_info(guc, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+static int guc_log(struct seq_file *m, void *data)
+{
+	struct xe_guc *guc = node_to_guc(m->private);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_guc_log_print(&guc->log, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+	{"guc_info", guc_info, 0},
+	{"guc_log", guc_log, 0},
+};
+
+void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
+{
+	struct drm_minor *minor = guc_to_xe(guc)->drm.primary;
+	struct drm_info_list *local;
+	int i;
+
+#define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
+	local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+	if (!local)
+		return;
+
+	memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+		local[i].data = guc;
+
+	drm_debugfs_create_files(local,
+				 ARRAY_SIZE(debugfs_list),
+				 parent, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.h b/drivers/gpu/drm/xe/xe_guc_debugfs.h
new file mode 100644
index 000000000000..4756dff26fca
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_DEBUGFS_H_
+#define _XE_GUC_DEBUGFS_H_
+
+struct dentry;
+struct xe_guc;
+
+void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
new file mode 100644
index 000000000000..4c39f01e4f52
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ENGINE_TYPES_H_
+#define _XE_GUC_ENGINE_TYPES_H_
+
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "xe_gpu_scheduler_types.h"
+
+struct dma_fence;
+struct xe_exec_queue;
+
+/**
+ * struct xe_guc_exec_queue - GuC specific state for an xe_exec_queue
+ */
+struct xe_guc_exec_queue {
+	/** @q: Backpointer to parent xe_exec_queue */
+	struct xe_exec_queue *q;
+	/** @sched: GPU scheduler for this xe_exec_queue */
+	struct xe_gpu_scheduler sched;
+	/** @entity: Scheduler entity for this xe_exec_queue */
+	struct xe_sched_entity entity;
+	/**
+	 * @static_msgs: Static messages for this xe_exec_queue, used when
+	 * a message needs to sent through the GPU scheduler but memory
+	 * allocations are not allowed.
+	 */
+#define MAX_STATIC_MSG_TYPE	3
+	struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE];
+	/** @lr_tdr: long running TDR worker */
+	struct work_struct lr_tdr;
+	/** @fini_async: do final fini async from this worker */
+	struct work_struct fini_async;
+	/** @resume_time: time of last resume */
+	u64 resume_time;
+	/** @state: GuC specific state for this xe_exec_queue */
+	atomic_t state;
+	/** @wqi_head: work queue item tail */
+	u32 wqi_head;
+	/** @wqi_tail: work queue item tail */
+	u32 wqi_tail;
+	/** @id: GuC id for this exec_queue */
+	u16 id;
+	/** @suspend_wait: wait queue used to wait on pending suspends */
+	wait_queue_head_t suspend_wait;
+	/** @suspend_pending: a suspend of the exec_queue is pending */
+	bool suspend_pending;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
new file mode 100644
index 000000000000..4dd5a88a7826
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_FWIF_H
+#define _XE_GUC_FWIF_H
+
+#include <linux/bits.h>
+
+#include "abi/guc_klvs_abi.h"
+
+#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET	4
+#define G2H_LEN_DW_DEREGISTER_CONTEXT		3
+#define G2H_LEN_DW_TLB_INVALIDATE		3
+
+#define GUC_CONTEXT_DISABLE		0
+#define GUC_CONTEXT_ENABLE		1
+
+#define GUC_CLIENT_PRIORITY_KMD_HIGH	0
+#define GUC_CLIENT_PRIORITY_HIGH	1
+#define GUC_CLIENT_PRIORITY_KMD_NORMAL	2
+#define GUC_CLIENT_PRIORITY_NORMAL	3
+#define GUC_CLIENT_PRIORITY_NUM		4
+
+#define GUC_RENDER_ENGINE		0
+#define GUC_VIDEO_ENGINE		1
+#define GUC_BLITTER_ENGINE		2
+#define GUC_VIDEOENHANCE_ENGINE		3
+#define GUC_VIDEO_ENGINE2		4
+#define GUC_MAX_ENGINES_NUM		(GUC_VIDEO_ENGINE2 + 1)
+
+#define GUC_RENDER_CLASS		0
+#define GUC_VIDEO_CLASS			1
+#define GUC_VIDEOENHANCE_CLASS		2
+#define GUC_BLITTER_CLASS		3
+#define GUC_COMPUTE_CLASS		4
+#define GUC_GSC_OTHER_CLASS		5
+#define GUC_LAST_ENGINE_CLASS		GUC_GSC_OTHER_CLASS
+#define GUC_MAX_ENGINE_CLASSES		16
+#define GUC_MAX_INSTANCES_PER_CLASS	32
+
+/* Helper for context registration H2G */
+struct guc_ctxt_registration_info {
+	u32 flags;
+	u32 context_idx;
+	u32 engine_class;
+	u32 engine_submit_mask;
+	u32 wq_desc_lo;
+	u32 wq_desc_hi;
+	u32 wq_base_lo;
+	u32 wq_base_hi;
+	u32 wq_size;
+	u32 hwlrca_lo;
+	u32 hwlrca_hi;
+};
+#define CONTEXT_REGISTRATION_FLAG_KMD	BIT(0)
+
+/* 32-bit KLV structure as used by policy updates and others */
+struct guc_klv_generic_dw_t {
+	u32 kl;
+	u32 value;
+} __packed;
+
+/* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */
+struct guc_update_exec_queue_policy_header {
+	u32 action;
+	u32 guc_id;
+} __packed;
+
+struct guc_update_exec_queue_policy {
+	struct guc_update_exec_queue_policy_header header;
+	struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS];
+} __packed;
+
+/* GUC_CTL_* - Parameters for loading the GuC */
+#define GUC_CTL_LOG_PARAMS		0
+#define   GUC_LOG_VALID			BIT(0)
+#define   GUC_LOG_NOTIFY_ON_HALF_FULL	BIT(1)
+#define   GUC_LOG_CAPTURE_ALLOC_UNITS	BIT(2)
+#define   GUC_LOG_LOG_ALLOC_UNITS	BIT(3)
+#define   GUC_LOG_CRASH_SHIFT		4
+#define   GUC_LOG_CRASH_MASK		(0x3 << GUC_LOG_CRASH_SHIFT)
+#define   GUC_LOG_DEBUG_SHIFT		6
+#define   GUC_LOG_DEBUG_MASK	        (0xF << GUC_LOG_DEBUG_SHIFT)
+#define   GUC_LOG_CAPTURE_SHIFT		10
+#define   GUC_LOG_CAPTURE_MASK	        (0x3 << GUC_LOG_CAPTURE_SHIFT)
+#define   GUC_LOG_BUF_ADDR_SHIFT	12
+
+#define GUC_CTL_WA			1
+#define   GUC_WA_GAM_CREDITS		BIT(10)
+#define   GUC_WA_DUAL_QUEUE		BIT(11)
+#define   GUC_WA_RCS_RESET_BEFORE_RC6	BIT(13)
+#define   GUC_WA_CONTEXT_ISOLATION	BIT(15)
+#define   GUC_WA_PRE_PARSER		BIT(14)
+#define   GUC_WA_HOLD_CCS_SWITCHOUT	BIT(17)
+#define   GUC_WA_POLLCS			BIT(18)
+#define   GUC_WA_RENDER_RST_RC6_EXIT	BIT(19)
+#define   GUC_WA_RCS_REGS_IN_CCS_REGS_LIST	BIT(21)
+
+#define GUC_CTL_FEATURE			2
+#define   GUC_CTL_ENABLE_SLPC		BIT(2)
+#define   GUC_CTL_DISABLE_SCHEDULER	BIT(14)
+
+#define GUC_CTL_DEBUG			3
+#define   GUC_LOG_VERBOSITY_SHIFT	0
+#define   GUC_LOG_VERBOSITY_LOW		(0 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_MED		(1 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_HIGH	(2 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_ULTRA	(3 << GUC_LOG_VERBOSITY_SHIFT)
+#define	  GUC_LOG_VERBOSITY_MIN		0
+#define	  GUC_LOG_VERBOSITY_MAX		3
+#define	  GUC_LOG_VERBOSITY_MASK	0x0000000f
+#define	  GUC_LOG_DESTINATION_MASK	(3 << 4)
+#define   GUC_LOG_DISABLED		(1 << 6)
+#define   GUC_PROFILE_ENABLED		(1 << 7)
+
+#define GUC_CTL_ADS			4
+#define   GUC_ADS_ADDR_SHIFT		1
+#define   GUC_ADS_ADDR_MASK		(0xFFFFF << GUC_ADS_ADDR_SHIFT)
+
+#define GUC_CTL_DEVID			5
+
+#define GUC_CTL_MAX_DWORDS		14
+
+/* Scheduling policy settings */
+
+#define GLOBAL_POLICY_MAX_NUM_WI 15
+
+/* Don't reset an engine upon preemption failure */
+#define GLOBAL_POLICY_DISABLE_ENGINE_RESET				BIT(0)
+
+#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000
+
+struct guc_policies {
+	u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES];
+	/*
+	 * In micro seconds. How much time to allow before DPC processing is
+	 * called back via interrupt (to prevent DPC queue drain starving).
+	 * Typically 1000s of micro seconds (example only, not granularity).
+	 */
+	u32 dpc_promote_time;
+
+	/* Must be set to take these new values. */
+	u32 is_valid;
+
+	/*
+	 * Max number of WIs to process per call. A large value may keep CS
+	 * idle.
+	 */
+	u32 max_num_work_items;
+
+	u32 global_flags;
+	u32 reserved[4];
+} __packed;
+
+/* GuC MMIO reg state struct */
+struct guc_mmio_reg {
+	u32 offset;
+	u32 value;
+	u32 flags;
+	u32 mask;
+#define GUC_REGSET_MASKED		BIT(0)
+#define GUC_REGSET_MASKED_WITH_VALUE	BIT(2)
+#define GUC_REGSET_RESTORE_ONLY		BIT(3)
+} __packed;
+
+/* GuC register sets */
+struct guc_mmio_reg_set {
+	u32 address;
+	u16 count;
+	u16 reserved;
+} __packed;
+
+/* Generic GT SysInfo data types */
+#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED		0
+#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK	1
+#define GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI	2
+#define GUC_GENERIC_GT_SYSINFO_MAX			16
+
+/* HW info */
+struct guc_gt_system_info {
+	u8 mapping_table[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+	u32 engine_enabled_masks[GUC_MAX_ENGINE_CLASSES];
+	u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX];
+} __packed;
+
+enum {
+	GUC_CAPTURE_LIST_INDEX_PF = 0,
+	GUC_CAPTURE_LIST_INDEX_VF = 1,
+	GUC_CAPTURE_LIST_INDEX_MAX = 2,
+};
+
+/* GuC Additional Data Struct */
+struct guc_ads {
+	struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+	u32 reserved0;
+	u32 scheduler_policies;
+	u32 gt_system_info;
+	u32 reserved1;
+	u32 control_data;
+	u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES];
+	u32 eng_state_size[GUC_MAX_ENGINE_CLASSES];
+	u32 private_data;
+	u32 um_init_data;
+	u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+	u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+	u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX];
+	u32 reserved[14];
+} __packed;
+
+/* Engine usage stats */
+struct guc_engine_usage_record {
+	u32 current_context_index;
+	u32 last_switch_in_stamp;
+	u32 reserved0;
+	u32 total_runtime;
+	u32 reserved1[4];
+} __packed;
+
+struct guc_engine_usage {
+	struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+} __packed;
+
+/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
+enum xe_guc_recv_message {
+	XE_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
+	XE_GUC_RECV_MSG_EXCEPTION = BIT(30),
+};
+
+/* Page fault structures */
+struct access_counter_desc {
+	u32 dw0;
+#define ACCESS_COUNTER_TYPE	BIT(0)
+#define ACCESS_COUNTER_SUBG_LO	GENMASK(31, 1)
+
+	u32 dw1;
+#define ACCESS_COUNTER_SUBG_HI	BIT(0)
+#define ACCESS_COUNTER_RSVD0	GENMASK(2, 1)
+#define ACCESS_COUNTER_ENG_INSTANCE	GENMASK(8, 3)
+#define ACCESS_COUNTER_ENG_CLASS	GENMASK(11, 9)
+#define ACCESS_COUNTER_ASID	GENMASK(31, 12)
+
+	u32 dw2;
+#define ACCESS_COUNTER_VFID	GENMASK(5, 0)
+#define ACCESS_COUNTER_RSVD1	GENMASK(7, 6)
+#define ACCESS_COUNTER_GRANULARITY	GENMASK(10, 8)
+#define ACCESS_COUNTER_RSVD2	GENMASK(16, 11)
+#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_LO	GENMASK(31, 17)
+
+	u32 dw3;
+#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_HI	GENMASK(31, 0)
+} __packed;
+
+enum guc_um_queue_type {
+	GUC_UM_HW_QUEUE_PAGE_FAULT = 0,
+	GUC_UM_HW_QUEUE_PAGE_FAULT_RESPONSE,
+	GUC_UM_HW_QUEUE_ACCESS_COUNTER,
+	GUC_UM_HW_QUEUE_MAX
+};
+
+struct guc_um_queue_params {
+	u64 base_dpa;
+	u32 base_ggtt_address;
+	u32 size_in_bytes;
+	u32 rsvd[4];
+} __packed;
+
+struct guc_um_init_params {
+	u64 page_response_timeout_in_us;
+	u32 rsvd[6];
+	struct guc_um_queue_params queue_params[GUC_UM_HW_QUEUE_MAX];
+} __packed;
+
+enum xe_guc_fault_reply_type {
+	PFR_ACCESS = 0,
+	PFR_ENGINE,
+	PFR_VFID,
+	PFR_ALL,
+	PFR_INVALID
+};
+
+enum xe_guc_response_desc_type {
+	TLB_INVALIDATION_DESC = 0,
+	FAULT_RESPONSE_DESC
+};
+
+struct xe_guc_pagefault_desc {
+	u32 dw0;
+#define PFD_FAULT_LEVEL		GENMASK(2, 0)
+#define PFD_SRC_ID		GENMASK(10, 3)
+#define PFD_RSVD_0		GENMASK(17, 11)
+#define XE2_PFD_TRVA_FAULT	BIT(18)
+#define PFD_ENG_INSTANCE	GENMASK(24, 19)
+#define PFD_ENG_CLASS		GENMASK(27, 25)
+#define PFD_PDATA_LO		GENMASK(31, 28)
+
+	u32 dw1;
+#define PFD_PDATA_HI		GENMASK(11, 0)
+#define PFD_PDATA_HI_SHIFT	4
+#define PFD_ASID		GENMASK(31, 12)
+
+	u32 dw2;
+#define PFD_ACCESS_TYPE		GENMASK(1, 0)
+#define PFD_FAULT_TYPE		GENMASK(3, 2)
+#define PFD_VFID		GENMASK(9, 4)
+#define PFD_RSVD_1		GENMASK(11, 10)
+#define PFD_VIRTUAL_ADDR_LO	GENMASK(31, 12)
+#define PFD_VIRTUAL_ADDR_LO_SHIFT 12
+
+	u32 dw3;
+#define PFD_VIRTUAL_ADDR_HI	GENMASK(31, 0)
+#define PFD_VIRTUAL_ADDR_HI_SHIFT 32
+} __packed;
+
+struct xe_guc_pagefault_reply {
+	u32 dw0;
+#define PFR_VALID		BIT(0)
+#define PFR_SUCCESS		BIT(1)
+#define PFR_REPLY		GENMASK(4, 2)
+#define PFR_RSVD_0		GENMASK(9, 5)
+#define PFR_DESC_TYPE		GENMASK(11, 10)
+#define PFR_ASID		GENMASK(31, 12)
+
+	u32 dw1;
+#define PFR_VFID		GENMASK(5, 0)
+#define PFR_RSVD_1		BIT(6)
+#define PFR_ENG_INSTANCE	GENMASK(12, 7)
+#define PFR_ENG_CLASS		GENMASK(15, 13)
+#define PFR_PDATA		GENMASK(31, 16)
+
+	u32 dw2;
+#define PFR_RSVD_2		GENMASK(31, 0)
+} __packed;
+
+struct xe_guc_acc_desc {
+	u32 dw0;
+#define ACC_TYPE	BIT(0)
+#define ACC_TRIGGER	0
+#define ACC_NOTIFY	1
+#define ACC_SUBG_LO	GENMASK(31, 1)
+
+	u32 dw1;
+#define ACC_SUBG_HI	BIT(0)
+#define ACC_RSVD0	GENMASK(2, 1)
+#define ACC_ENG_INSTANCE	GENMASK(8, 3)
+#define ACC_ENG_CLASS	GENMASK(11, 9)
+#define ACC_ASID	GENMASK(31, 12)
+
+	u32 dw2;
+#define ACC_VFID	GENMASK(5, 0)
+#define ACC_RSVD1	GENMASK(7, 6)
+#define ACC_GRANULARITY	GENMASK(10, 8)
+#define ACC_RSVD2	GENMASK(16, 11)
+#define ACC_VIRTUAL_ADDR_RANGE_LO	GENMASK(31, 17)
+
+	u32 dw3;
+#define ACC_VIRTUAL_ADDR_RANGE_HI	GENMASK(31, 0)
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
new file mode 100644
index 000000000000..2a13a00917f8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_hwconfig.h"
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_map.h"
+
+static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_GET_HWCONFIG,
+		lower_32_bits(ggtt_addr),
+		upper_32_bits(ggtt_addr),
+		size,
+	};
+
+	return xe_guc_mmio_send(guc, action, ARRAY_SIZE(action));
+}
+
+static int guc_hwconfig_size(struct xe_guc *guc, u32 *size)
+{
+	int ret = send_get_hwconfig(guc, 0, 0);
+
+	if (ret < 0)
+		return ret;
+
+	*size = ret;
+	return 0;
+}
+
+static int guc_hwconfig_copy(struct xe_guc *guc)
+{
+	int ret = send_get_hwconfig(guc, xe_bo_ggtt_addr(guc->hwconfig.bo),
+				    guc->hwconfig.size);
+
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int xe_guc_hwconfig_init(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_bo *bo;
+	u32 size;
+	int err;
+
+	/* Initialization already done */
+	if (guc->hwconfig.bo)
+		return 0;
+
+	/*
+	 * All hwconfig the same across GTs so only GT0 needs to be configured
+	 */
+	if (gt->info.id != XE_GT0)
+		return 0;
+
+	/* ADL_P, DG2+ supports hwconfig table */
+	if (GRAPHICS_VERx100(xe) < 1255 && xe->info.platform != XE_ALDERLAKE_P)
+		return 0;
+
+	err = guc_hwconfig_size(guc, &size);
+	if (err)
+		return err;
+	if (!size)
+		return -EINVAL;
+
+	bo = xe_managed_bo_create_pin_map(xe, tile, PAGE_ALIGN(size),
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+	guc->hwconfig.bo = bo;
+	guc->hwconfig.size = size;
+
+	return guc_hwconfig_copy(guc);
+}
+
+u32 xe_guc_hwconfig_size(struct xe_guc *guc)
+{
+	return !guc->hwconfig.bo ? 0 : guc->hwconfig.size;
+}
+
+void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+
+	XE_WARN_ON(!guc->hwconfig.bo);
+
+	xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0,
+			   guc->hwconfig.size);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h
new file mode 100644
index 000000000000..b5794d641900
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_HWCONFIG_H_
+#define _XE_GUC_HWCONFIG_H_
+
+#include <linux/types.h>
+
+struct xe_guc;
+
+int xe_guc_hwconfig_init(struct xe_guc *guc);
+u32 xe_guc_hwconfig_size(struct xe_guc *guc);
+void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
new file mode 100644
index 000000000000..bcd2f4d34081
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_log.h"
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_module.h"
+
+static struct xe_gt *
+log_to_gt(struct xe_guc_log *log)
+{
+	return container_of(log, struct xe_gt, uc.guc.log);
+}
+
+static struct xe_device *
+log_to_xe(struct xe_guc_log *log)
+{
+	return gt_to_xe(log_to_gt(log));
+}
+
+static size_t guc_log_size(void)
+{
+	/*
+	 *  GuC Log buffer Layout
+	 *
+	 *  +===============================+ 00B
+	 *  |    Crash dump state header    |
+	 *  +-------------------------------+ 32B
+	 *  |      Debug state header       |
+	 *  +-------------------------------+ 64B
+	 *  |     Capture state header      |
+	 *  +-------------------------------+ 96B
+	 *  |                               |
+	 *  +===============================+ PAGE_SIZE (4KB)
+	 *  |        Crash Dump logs        |
+	 *  +===============================+ + CRASH_SIZE
+	 *  |          Debug logs           |
+	 *  +===============================+ + DEBUG_SIZE
+	 *  |         Capture logs          |
+	 *  +===============================+ + CAPTURE_SIZE
+	 */
+	return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
+		CAPTURE_BUFFER_SIZE;
+}
+
+void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
+{
+	struct xe_device *xe = log_to_xe(log);
+	size_t size;
+	int i, j;
+
+	xe_assert(xe, log->bo);
+
+	size = log->bo->size;
+
+#define DW_PER_READ		128
+	xe_assert(xe, !(size % (DW_PER_READ * sizeof(u32))));
+	for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) {
+		u32 read[DW_PER_READ];
+
+		xe_map_memcpy_from(xe, read, &log->bo->vmap, i * sizeof(u32),
+				   DW_PER_READ * sizeof(u32));
+#define DW_PER_PRINT		4
+		for (j = 0; j < DW_PER_READ / DW_PER_PRINT; ++j) {
+			u32 *print = read + j * DW_PER_PRINT;
+
+			drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+				   *(print + 0), *(print + 1),
+				   *(print + 2), *(print + 3));
+		}
+	}
+}
+
+int xe_guc_log_init(struct xe_guc_log *log)
+{
+	struct xe_device *xe = log_to_xe(log);
+	struct xe_tile *tile = gt_to_tile(log_to_gt(log));
+	struct xe_bo *bo;
+
+	bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(),
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size());
+	log->bo = bo;
+	log->level = xe_modparam.guc_log_level;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
new file mode 100644
index 000000000000..2d25ab28b4b3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_LOG_H_
+#define _XE_GUC_LOG_H_
+
+#include "xe_guc_log_types.h"
+
+struct drm_printer;
+
+#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
+#define CRASH_BUFFER_SIZE       SZ_1M
+#define DEBUG_BUFFER_SIZE       SZ_8M
+#define CAPTURE_BUFFER_SIZE     SZ_2M
+#else
+#define CRASH_BUFFER_SIZE	SZ_8K
+#define DEBUG_BUFFER_SIZE	SZ_64K
+#define CAPTURE_BUFFER_SIZE	SZ_16K
+#endif
+/*
+ * While we're using plain log level in i915, GuC controls are much more...
+ * "elaborate"? We have a couple of bits for verbosity, separate bit for actual
+ * log enabling, and separate bit for default logging - which "conveniently"
+ * ignores the enable bit.
+ */
+#define GUC_LOG_LEVEL_DISABLED		0
+#define GUC_LOG_LEVEL_NON_VERBOSE	1
+#define GUC_LOG_LEVEL_IS_ENABLED(x)	((x) > GUC_LOG_LEVEL_DISABLED)
+#define GUC_LOG_LEVEL_IS_VERBOSE(x)	((x) > GUC_LOG_LEVEL_NON_VERBOSE)
+#define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({		\
+	typeof(x) _x = (x);				\
+	GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0;	\
+})
+#define GUC_VERBOSITY_TO_LOG_LEVEL(x)	((x) + 2)
+#define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX)
+
+int xe_guc_log_init(struct xe_guc_log *log);
+void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p);
+
+static inline u32
+xe_guc_log_get_level(struct xe_guc_log *log)
+{
+	return log->level;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
new file mode 100644
index 000000000000..125080d138a7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_LOG_TYPES_H_
+#define _XE_GUC_LOG_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/**
+ * struct xe_guc_log - GuC log
+ */
+struct xe_guc_log {
+	/** @level: GuC log level */
+	u32 level;
+	/** @bo: XE BO for GuC log */
+	struct xe_bo *bo;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
new file mode 100644
index 000000000000..f71085228cb3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -0,0 +1,1000 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_pc.h"
+
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_actions_slpc_abi.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_idle.h"
+#include "xe_gt_sysfs.h"
+#include "xe_gt_types.h"
+#include "xe_guc_ct.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_pcode.h"
+
+#define MCHBAR_MIRROR_BASE_SNB	0x140000
+
+#define RP_STATE_CAP		XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5998)
+#define   RP0_MASK		REG_GENMASK(7, 0)
+#define   RP1_MASK		REG_GENMASK(15, 8)
+#define   RPN_MASK		REG_GENMASK(23, 16)
+
+#define FREQ_INFO_REC	XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
+#define   RPE_MASK		REG_GENMASK(15, 8)
+
+#define GT_PERF_STATUS		XE_REG(0x1381b4)
+#define   CAGF_MASK	REG_GENMASK(19, 11)
+
+#define GT_FREQUENCY_MULTIPLIER	50
+#define GT_FREQUENCY_SCALER	3
+
+/**
+ * DOC: GuC Power Conservation (PC)
+ *
+ * GuC Power Conservation (PC) supports multiple features for the most
+ * efficient and performing use of the GT when GuC submission is enabled,
+ * including frequency management, Render-C states management, and various
+ * algorithms for power balancing.
+ *
+ * Single Loop Power Conservation (SLPC) is the name given to the suite of
+ * connected power conservation features in the GuC firmware. The firmware
+ * exposes a programming interface to the host for the control of SLPC.
+ *
+ * Frequency management:
+ * =====================
+ *
+ * Xe driver enables SLPC with all of its defaults features and frequency
+ * selection, which varies per platform.
+ *
+ * Render-C States:
+ * ================
+ *
+ * Render-C states is also a GuC PC feature that is now enabled in Xe for
+ * all platforms.
+ *
+ */
+
+static struct xe_guc *
+pc_to_guc(struct xe_guc_pc *pc)
+{
+	return container_of(pc, struct xe_guc, pc);
+}
+
+static struct xe_device *
+pc_to_xe(struct xe_guc_pc *pc)
+{
+	struct xe_guc *guc = pc_to_guc(pc);
+	struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc);
+
+	return gt_to_xe(gt);
+}
+
+static struct xe_gt *
+pc_to_gt(struct xe_guc_pc *pc)
+{
+	return container_of(pc, struct xe_gt, uc.guc.pc);
+}
+
+static struct iosys_map *
+pc_to_maps(struct xe_guc_pc *pc)
+{
+	return &pc->bo->vmap;
+}
+
+#define slpc_shared_data_read(pc_, field_) \
+	xe_map_rd_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \
+			struct slpc_shared_data, field_)
+
+#define slpc_shared_data_write(pc_, field_, val_) \
+	xe_map_wr_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \
+			struct slpc_shared_data, field_, val_)
+
+#define SLPC_EVENT(id, count) \
+	(FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \
+	 FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count))
+
+static int wait_for_pc_state(struct xe_guc_pc *pc,
+			     enum slpc_global_state state)
+{
+	int timeout_us = 5000; /* rought 5ms, but no need for precision */
+	int slept, wait = 10;
+
+	xe_device_assert_mem_access(pc_to_xe(pc));
+
+	for (slept = 0; slept < timeout_us;) {
+		if (slpc_shared_data_read(pc, header.global_state) == state)
+			return 0;
+
+		usleep_range(wait, wait << 1);
+		slept += wait;
+		wait <<= 1;
+		if (slept + wait > timeout_us)
+			wait = timeout_us - slept;
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int pc_action_reset(struct xe_guc_pc *pc)
+{
+	struct  xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_RESET, 2),
+		xe_bo_ggtt_addr(pc->bo),
+		0,
+	};
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC reset: %pe", ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_shutdown(struct xe_guc_pc *pc)
+{
+	struct  xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_SHUTDOWN, 2),
+		xe_bo_ggtt_addr(pc->bo),
+		0,
+	};
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC shutdown %pe",
+			ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_query_task_state(struct xe_guc_pc *pc)
+{
+	struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2),
+		xe_bo_ggtt_addr(pc->bo),
+		0,
+	};
+
+	if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+		return -EAGAIN;
+
+	/* Blocking here to ensure the results are ready before reading them */
+	ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action));
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm,
+			"GuC PC query task state failed: %pe", ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value)
+{
+	struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	int ret;
+	u32 action[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+		id,
+		value,
+	};
+
+	if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+		return -EAGAIN;
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC set param failed: %pe",
+			ERR_PTR(ret));
+
+	return ret;
+}
+
+static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode)
+{
+	struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+	u32 action[] = {
+		XE_GUC_ACTION_SETUP_PC_GUCRC,
+		mode,
+	};
+	int ret;
+
+	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+	if (ret)
+		drm_err(&pc_to_xe(pc)->drm, "GuC RC enable failed: %pe",
+			ERR_PTR(ret));
+	return ret;
+}
+
+static u32 decode_freq(u32 raw)
+{
+	return DIV_ROUND_CLOSEST(raw * GT_FREQUENCY_MULTIPLIER,
+				 GT_FREQUENCY_SCALER);
+}
+
+static u32 encode_freq(u32 freq)
+{
+	return DIV_ROUND_CLOSEST(freq * GT_FREQUENCY_SCALER,
+				 GT_FREQUENCY_MULTIPLIER);
+}
+
+static u32 pc_get_min_freq(struct xe_guc_pc *pc)
+{
+	u32 freq;
+
+	freq = FIELD_GET(SLPC_MIN_UNSLICE_FREQ_MASK,
+			 slpc_shared_data_read(pc, task_state_data.freq));
+
+	return decode_freq(freq);
+}
+
+static void pc_set_manual_rp_ctrl(struct xe_guc_pc *pc, bool enable)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 state = enable ? RPSWCTL_ENABLE : RPSWCTL_DISABLE;
+
+	/* Allow/Disallow punit to process software freq requests */
+	xe_mmio_write32(gt, RP_CONTROL, state);
+}
+
+static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 rpnswreq;
+
+	pc_set_manual_rp_ctrl(pc, true);
+
+	/* Req freq is in units of 16.66 Mhz */
+	rpnswreq = REG_FIELD_PREP(REQ_RATIO_MASK, encode_freq(freq));
+	xe_mmio_write32(gt, RPNSWREQ, rpnswreq);
+
+	/* Sleep for a small time to allow pcode to respond */
+	usleep_range(100, 300);
+
+	pc_set_manual_rp_ctrl(pc, false);
+}
+
+static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	/*
+	 * Let's only check for the rpn-rp0 range. If max < min,
+	 * min becomes a fixed request.
+	 */
+	if (freq < pc->rpn_freq || freq > pc->rp0_freq)
+		return -EINVAL;
+
+	/*
+	 * GuC policy is to elevate minimum frequency to the efficient levels
+	 * Our goal is to have the admin choices respected.
+	 */
+	pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
+			    freq < pc->rpe_freq);
+
+	return pc_action_set_param(pc,
+				   SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+				   freq);
+}
+
+static int pc_get_max_freq(struct xe_guc_pc *pc)
+{
+	u32 freq;
+
+	freq = FIELD_GET(SLPC_MAX_UNSLICE_FREQ_MASK,
+			 slpc_shared_data_read(pc, task_state_data.freq));
+
+	return decode_freq(freq);
+}
+
+static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	/*
+	 * Let's only check for the rpn-rp0 range. If max < min,
+	 * min becomes a fixed request.
+	 * Also, overclocking is not supported.
+	 */
+	if (freq < pc->rpn_freq || freq > pc->rp0_freq)
+		return -EINVAL;
+
+	return pc_action_set_param(pc,
+				   SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ,
+				   freq);
+}
+
+static void mtl_update_rpe_value(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg;
+
+	if (xe_gt_is_media_type(gt))
+		reg = xe_mmio_read32(gt, MTL_MPE_FREQUENCY);
+	else
+		reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY);
+
+	pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg));
+}
+
+static void tgl_update_rpe_value(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 reg;
+
+	/*
+	 * For PVC we still need to use fused RP1 as the approximation for RPe
+	 * For other platforms than PVC we get the resolved RPe directly from
+	 * PCODE at a different register
+	 */
+	if (xe->info.platform == XE_PVC)
+		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
+	else
+		reg = xe_mmio_read32(gt, FREQ_INFO_REC);
+
+	pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
+static void pc_update_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (GRAPHICS_VERx100(xe) >= 1270)
+		mtl_update_rpe_value(pc);
+	else
+		tgl_update_rpe_value(pc);
+
+	/*
+	 * RPe is decided at runtime by PCODE. In the rare case where that's
+	 * smaller than the fused min, we will trust the PCODE and use that
+	 * as our minimum one.
+	 */
+	pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq);
+}
+
+/**
+ * xe_guc_pc_get_act_freq - Get Actual running frequency
+ * @pc: The GuC PC
+ *
+ * Returns: The Actual running frequency. Which might be 0 if GT is in Render-C sleep state (RC6).
+ */
+u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 freq;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+
+	/* When in RC6, actual frequency reported will be 0. */
+	if (GRAPHICS_VERx100(xe) >= 1270) {
+		freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
+		freq = REG_FIELD_GET(MTL_CAGF_MASK, freq);
+	} else {
+		freq = xe_mmio_read32(gt, GT_PERF_STATUS);
+		freq = REG_FIELD_GET(CAGF_MASK, freq);
+	}
+
+	freq = decode_freq(freq);
+
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return freq;
+}
+
+/**
+ * xe_guc_pc_get_cur_freq - Get Current requested frequency
+ * @pc: The GuC PC
+ * @freq: A pointer to a u32 where the freq value will be returned
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset).
+ */
+int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	/*
+	 * GuC SLPC plays with cur freq request when GuCRC is enabled
+	 * Block RC6 for a more reliable read.
+	 */
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		goto out;
+
+	*freq = xe_mmio_read32(gt, RPNSWREQ);
+
+	*freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq);
+	*freq = decode_freq(*freq);
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+out:
+	xe_device_mem_access_put(gt_to_xe(gt));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_get_rp0_freq - Get the RP0 freq
+ * @pc: The GuC PC
+ *
+ * Returns: RP0 freq.
+ */
+u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc)
+{
+	return pc->rp0_freq;
+}
+
+/**
+ * xe_guc_pc_get_rpe_freq - Get the RPe freq
+ * @pc: The GuC PC
+ *
+ * Returns: RPe freq.
+ */
+u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+
+	xe_device_mem_access_get(xe);
+	pc_update_rp_values(pc);
+	xe_device_mem_access_put(xe);
+
+	return pc->rpe_freq;
+}
+
+/**
+ * xe_guc_pc_get_rpn_freq - Get the RPn freq
+ * @pc: The GuC PC
+ *
+ * Returns: RPn freq.
+ */
+u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
+{
+	return pc->rpn_freq;
+}
+
+/**
+ * xe_guc_pc_get_min_freq - Get the min operational frequency
+ * @pc: The GuC PC
+ * @freq: A pointer to a u32 where the freq value will be returned
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset).
+ */
+int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	int ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	/*
+	 * GuC SLPC plays with min freq request when GuCRC is enabled
+	 * Block RC6 for a more reliable read.
+	 */
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		goto out;
+
+	ret = pc_action_query_task_state(pc);
+	if (ret)
+		goto fw;
+
+	*freq = pc_get_min_freq(pc);
+
+fw:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_set_min_freq - Set the minimal operational frequency
+ * @pc: The GuC PC
+ * @freq: The selected minimal frequency
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset),
+ *         -EINVAL if value out of bounds.
+ */
+int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	int ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	ret = pc_set_min_freq(pc, freq);
+	if (ret)
+		goto out;
+
+	pc->user_requested_min = freq;
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+
+	return ret;
+}
+
+/**
+ * xe_guc_pc_get_max_freq - Get Maximum operational frequency
+ * @pc: The GuC PC
+ * @freq: A pointer to a u32 where the freq value will be returned
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset).
+ */
+int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
+{
+	int ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	ret = pc_action_query_task_state(pc);
+	if (ret)
+		goto out;
+
+	*freq = pc_get_max_freq(pc);
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_set_max_freq - Set the maximum operational frequency
+ * @pc: The GuC PC
+ * @freq: The selected maximum frequency value
+ *
+ * Returns: 0 on success,
+ *         -EAGAIN if GuC PC not ready (likely in middle of a reset),
+ *         -EINVAL if value out of bounds.
+ */
+int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
+{
+	int ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+	mutex_lock(&pc->freq_lock);
+	if (!pc->freq_ready) {
+		/* Might be in the middle of a gt reset */
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	ret = pc_set_max_freq(pc, freq);
+	if (ret)
+		goto out;
+
+	pc->user_requested_max = freq;
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_c_status - get the current GT C state
+ * @pc: XE_GuC_PC instance
+ */
+enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg, gt_c_state;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+		reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
+		gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg);
+	} else {
+		reg = xe_mmio_read32(gt, GT_CORE_STATUS);
+		gt_c_state = REG_FIELD_GET(RCN_MASK, reg);
+	}
+
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	switch (gt_c_state) {
+	case GT_C6:
+		return GT_IDLE_C6;
+	case GT_C0:
+		return GT_IDLE_C0;
+	default:
+		return GT_IDLE_UNKNOWN;
+	}
+}
+
+/**
+ * xe_guc_pc_rc6_residency - rc6 residency counter
+ * @pc: Xe_GuC_PC instance
+ */
+u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	reg = xe_mmio_read32(gt, GT_GFX_RC6);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return reg;
+}
+
+/**
+ * xe_guc_pc_mc6_residency - mc6 residency counter
+ * @pc: Xe_GuC_PC instance
+ */
+u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u64 reg;
+
+	xe_device_mem_access_get(gt_to_xe(gt));
+	reg = xe_mmio_read32(gt, MTL_MEDIA_MC6);
+	xe_device_mem_access_put(gt_to_xe(gt));
+
+	return reg;
+}
+
+static void mtl_init_fused_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 reg;
+
+	xe_device_assert_mem_access(pc_to_xe(pc));
+
+	if (xe_gt_is_media_type(gt))
+		reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP);
+	else
+		reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP);
+
+	pc->rp0_freq = decode_freq(REG_FIELD_GET(MTL_RP0_CAP_MASK, reg));
+
+	pc->rpn_freq = decode_freq(REG_FIELD_GET(MTL_RPN_CAP_MASK, reg));
+}
+
+static void tgl_init_fused_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 reg;
+
+	xe_device_assert_mem_access(pc_to_xe(pc));
+
+	if (xe->info.platform == XE_PVC)
+		reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP);
+	else
+		reg = xe_mmio_read32(gt, RP_STATE_CAP);
+	pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+	pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
+static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (GRAPHICS_VERx100(xe) >= 1270)
+		mtl_init_fused_rp_values(pc);
+	else
+		tgl_init_fused_rp_values(pc);
+}
+
+/**
+ * xe_guc_pc_init_early - Initialize RPx values and request a higher GT
+ * frequency to allow faster GuC load times
+ * @pc: Xe_GuC_PC instance
+ */
+void xe_guc_pc_init_early(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+	pc_init_fused_rp_values(pc);
+	pc_set_cur_freq(pc, pc->rp0_freq);
+}
+
+static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
+{
+	int ret;
+
+	lockdep_assert_held(&pc->freq_lock);
+
+	ret = pc_action_query_task_state(pc);
+	if (ret)
+		return ret;
+
+	/*
+	 * GuC defaults to some RPmax that is not actually achievable without
+	 * overclocking. Let's adjust it to the Hardware RP0, which is the
+	 * regular maximum
+	 */
+	if (pc_get_max_freq(pc) > pc->rp0_freq)
+		pc_set_max_freq(pc, pc->rp0_freq);
+
+	/*
+	 * Same thing happens for Server platforms where min is listed as
+	 * RPMax
+	 */
+	if (pc_get_min_freq(pc) > pc->rp0_freq)
+		pc_set_min_freq(pc, pc->rp0_freq);
+
+	return 0;
+}
+
+static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
+{
+	int ret = 0;
+
+	lockdep_assert_held(&pc->freq_lock);
+
+	if (pc->user_requested_min != 0) {
+		ret = pc_set_min_freq(pc, pc->user_requested_min);
+		if (ret)
+			return ret;
+	}
+
+	if (pc->user_requested_max != 0) {
+		ret = pc_set_max_freq(pc, pc->user_requested_max);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+/**
+ * xe_guc_pc_gucrc_disable - Disable GuC RC
+ * @pc: Xe_GuC_PC instance
+ *
+ * Disables GuC RC by taking control of RC6 back from GuC.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
+{
+	struct xe_device *xe = pc_to_xe(pc);
+	struct xe_gt *gt = pc_to_gt(pc);
+	int ret = 0;
+
+	if (xe->info.skip_guc_pc)
+		return 0;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+
+	ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL);
+	if (ret)
+		goto out;
+
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		goto out;
+
+	xe_gt_idle_disable_c6(gt);
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+out:
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+static void pc_init_pcode_freq(struct xe_guc_pc *pc)
+{
+	u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER);
+	u32 max = DIV_ROUND_CLOSEST(pc->rp0_freq, GT_FREQUENCY_MULTIPLIER);
+
+	XE_WARN_ON(xe_pcode_init_min_freq_table(pc_to_gt(pc), min, max));
+}
+
+static int pc_init_freqs(struct xe_guc_pc *pc)
+{
+	int ret;
+
+	mutex_lock(&pc->freq_lock);
+
+	ret = pc_adjust_freq_bounds(pc);
+	if (ret)
+		goto out;
+
+	ret = pc_adjust_requested_freq(pc);
+	if (ret)
+		goto out;
+
+	pc_update_rp_values(pc);
+
+	pc_init_pcode_freq(pc);
+
+	/*
+	 * The frequencies are really ready for use only after the user
+	 * requested ones got restored.
+	 */
+	pc->freq_ready = true;
+
+out:
+	mutex_unlock(&pc->freq_lock);
+	return ret;
+}
+
+/**
+ * xe_guc_pc_start - Start GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_start(struct xe_guc_pc *pc)
+{
+	struct xe_device *xe = pc_to_xe(pc);
+	struct xe_gt *gt = pc_to_gt(pc);
+	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+	int ret;
+
+	xe_gt_assert(gt, xe_device_uc_enabled(xe));
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (ret)
+		goto out_fail_force_wake;
+
+	if (xe->info.skip_guc_pc) {
+		if (xe->info.platform != XE_PVC)
+			xe_gt_idle_enable_c6(gt);
+
+		/* Request max possible since dynamic freq mgmt is not enabled */
+		pc_set_cur_freq(pc, UINT_MAX);
+
+		ret = 0;
+		goto out;
+	}
+
+	memset(pc->bo->vmap.vaddr, 0, size);
+	slpc_shared_data_write(pc, header.size, size);
+
+	ret = pc_action_reset(pc);
+	if (ret)
+		goto out;
+
+	if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) {
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	ret = pc_init_freqs(pc);
+	if (ret)
+		goto out;
+
+	if (xe->info.platform == XE_PVC) {
+		xe_guc_pc_gucrc_disable(pc);
+		ret = 0;
+		goto out;
+	}
+
+	ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL);
+
+out:
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+out_fail_force_wake:
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_stop - Stop GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_stop(struct xe_guc_pc *pc)
+{
+	struct xe_device *xe = pc_to_xe(pc);
+	int ret;
+
+	xe_device_mem_access_get(pc_to_xe(pc));
+
+	if (xe->info.skip_guc_pc) {
+		xe_gt_idle_disable_c6(pc_to_gt(pc));
+		ret = 0;
+		goto out;
+	}
+
+	mutex_lock(&pc->freq_lock);
+	pc->freq_ready = false;
+	mutex_unlock(&pc->freq_lock);
+
+	ret = pc_action_shutdown(pc);
+	if (ret)
+		goto out;
+
+	if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING)) {
+		drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n");
+		ret = -EIO;
+	}
+
+out:
+	xe_device_mem_access_put(pc_to_xe(pc));
+	return ret;
+}
+
+/**
+ * xe_guc_pc_fini - Finalize GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+void xe_guc_pc_fini(struct xe_guc_pc *pc)
+{
+	struct xe_device *xe = pc_to_xe(pc);
+
+	if (xe->info.skip_guc_pc) {
+		xe_gt_idle_disable_c6(pc_to_gt(pc));
+		return;
+	}
+
+	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
+	XE_WARN_ON(xe_guc_pc_stop(pc));
+	mutex_destroy(&pc->freq_lock);
+}
+
+/**
+ * xe_guc_pc_init - Initialize GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_init(struct xe_guc_pc *pc)
+{
+	struct xe_gt *gt = pc_to_gt(pc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *bo;
+	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+
+	if (xe->info.skip_guc_pc)
+		return 0;
+
+	mutex_init(&pc->freq_lock);
+
+	bo = xe_managed_bo_create_pin_map(xe, tile, size,
+					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+					  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	pc->bo = bo;
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
new file mode 100644
index 000000000000..cecad8e9300b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_PC_H_
+#define _XE_GUC_PC_H_
+
+#include "xe_guc_pc_types.h"
+
+int xe_guc_pc_init(struct xe_guc_pc *pc);
+void xe_guc_pc_fini(struct xe_guc_pc *pc);
+int xe_guc_pc_start(struct xe_guc_pc *pc);
+int xe_guc_pc_stop(struct xe_guc_pc *pc);
+int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc);
+
+u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc);
+int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq);
+u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc);
+u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc);
+u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc);
+int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq);
+int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq);
+int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq);
+int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq);
+
+enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
+u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
+u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
+void xe_guc_pc_init_early(struct xe_guc_pc *pc);
+#endif /* _XE_GUC_PC_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
new file mode 100644
index 000000000000..2afd0dbc3542
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_PC_TYPES_H_
+#define _XE_GUC_PC_TYPES_H_
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+/**
+ * struct xe_guc_pc - GuC Power Conservation (PC)
+ */
+struct xe_guc_pc {
+	/** @bo: GGTT buffer object that is shared with GuC PC */
+	struct xe_bo *bo;
+	/** @rp0_freq: HW RP0 frequency - The Maximum one */
+	u32 rp0_freq;
+	/** @rpe_freq: HW RPe frequency - The Efficient one */
+	u32 rpe_freq;
+	/** @rpn_freq: HW RPN frequency - The Minimum one */
+	u32 rpn_freq;
+	/** @user_requested_min: Stash the minimum requested freq by user */
+	u32 user_requested_min;
+	/** @user_requested_max: Stash the maximum requested freq by user */
+	u32 user_requested_max;
+	/** @freq_lock: Let's protect the frequencies */
+	struct mutex freq_lock;
+	/** @freq_ready: Only handle freq changes, if they are really ready */
+	bool freq_ready;
+};
+
+#endif	/* _XE_GUC_PC_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
new file mode 100644
index 000000000000..21ac68e3246f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -0,0 +1,1990 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_guc_submit.h"
+
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/dma-fence-array.h>
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_klvs_abi.h"
+#include "regs/xe_lrc_layout.h"
+#include "xe_assert.h"
+#include "xe_devcoredump.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
+#include "xe_gpu_scheduler.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_exec_queue_types.h"
+#include "xe_guc_submit_types.h"
+#include "xe_hw_engine.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_mocs.h"
+#include "xe_ring_ops_types.h"
+#include "xe_sched_job.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+static struct xe_guc *
+exec_queue_to_guc(struct xe_exec_queue *q)
+{
+	return &q->gt->uc.guc;
+}
+
+/*
+ * Helpers for engine state, using an atomic as some of the bits can transition
+ * as the same time (e.g. a suspend can be happning at the same time as schedule
+ * engine done being processed).
+ */
+#define EXEC_QUEUE_STATE_REGISTERED		(1 << 0)
+#define ENGINE_STATE_ENABLED		(1 << 1)
+#define EXEC_QUEUE_STATE_PENDING_ENABLE	(1 << 2)
+#define EXEC_QUEUE_STATE_PENDING_DISABLE	(1 << 3)
+#define EXEC_QUEUE_STATE_DESTROYED		(1 << 4)
+#define ENGINE_STATE_SUSPENDED		(1 << 5)
+#define EXEC_QUEUE_STATE_RESET		(1 << 6)
+#define ENGINE_STATE_KILLED		(1 << 7)
+
+static bool exec_queue_registered(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
+}
+
+static void set_exec_queue_registered(struct xe_exec_queue *q)
+{
+	atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
+}
+
+static void clear_exec_queue_registered(struct xe_exec_queue *q)
+{
+	atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
+}
+
+static bool exec_queue_enabled(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED;
+}
+
+static void set_exec_queue_enabled(struct xe_exec_queue *q)
+{
+	atomic_or(ENGINE_STATE_ENABLED, &q->guc->state);
+}
+
+static void clear_exec_queue_enabled(struct xe_exec_queue *q)
+{
+	atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state);
+}
+
+static bool exec_queue_pending_enable(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
+}
+
+static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
+{
+	atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
+}
+
+static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
+{
+	atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
+}
+
+static bool exec_queue_pending_disable(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
+}
+
+static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
+{
+	atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
+}
+
+static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
+{
+	atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
+}
+
+static bool exec_queue_destroyed(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
+}
+
+static void set_exec_queue_destroyed(struct xe_exec_queue *q)
+{
+	atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
+}
+
+static bool exec_queue_banned(struct xe_exec_queue *q)
+{
+	return (q->flags & EXEC_QUEUE_FLAG_BANNED);
+}
+
+static void set_exec_queue_banned(struct xe_exec_queue *q)
+{
+	q->flags |= EXEC_QUEUE_FLAG_BANNED;
+}
+
+static bool exec_queue_suspended(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED;
+}
+
+static void set_exec_queue_suspended(struct xe_exec_queue *q)
+{
+	atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state);
+}
+
+static void clear_exec_queue_suspended(struct xe_exec_queue *q)
+{
+	atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state);
+}
+
+static bool exec_queue_reset(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET;
+}
+
+static void set_exec_queue_reset(struct xe_exec_queue *q)
+{
+	atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state);
+}
+
+static bool exec_queue_killed(struct xe_exec_queue *q)
+{
+	return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED;
+}
+
+static void set_exec_queue_killed(struct xe_exec_queue *q)
+{
+	atomic_or(ENGINE_STATE_KILLED, &q->guc->state);
+}
+
+static bool exec_queue_killed_or_banned(struct xe_exec_queue *q)
+{
+	return exec_queue_killed(q) || exec_queue_banned(q);
+}
+
+#ifdef CONFIG_PROVE_LOCKING
+static int alloc_submit_wq(struct xe_guc *guc)
+{
+	int i;
+
+	for (i = 0; i < NUM_SUBMIT_WQ; ++i) {
+		guc->submission_state.submit_wq_pool[i] =
+			alloc_ordered_workqueue("submit_wq", 0);
+		if (!guc->submission_state.submit_wq_pool[i])
+			goto err_free;
+	}
+
+	return 0;
+
+err_free:
+	while (i)
+		destroy_workqueue(guc->submission_state.submit_wq_pool[--i]);
+
+	return -ENOMEM;
+}
+
+static void free_submit_wq(struct xe_guc *guc)
+{
+	int i;
+
+	for (i = 0; i < NUM_SUBMIT_WQ; ++i)
+		destroy_workqueue(guc->submission_state.submit_wq_pool[i]);
+}
+
+static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
+{
+	int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ;
+
+	return guc->submission_state.submit_wq_pool[idx];
+}
+#else
+static int alloc_submit_wq(struct xe_guc *guc)
+{
+	return 0;
+}
+
+static void free_submit_wq(struct xe_guc *guc)
+{
+
+}
+
+static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
+{
+	return NULL;
+}
+#endif
+
+static void guc_submit_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc *guc = arg;
+
+	xa_destroy(&guc->submission_state.exec_queue_lookup);
+	ida_destroy(&guc->submission_state.guc_ids);
+	bitmap_free(guc->submission_state.guc_ids_bitmap);
+	free_submit_wq(guc);
+	mutex_destroy(&guc->submission_state.lock);
+}
+
+#define GUC_ID_MAX		65535
+#define GUC_ID_NUMBER_MLRC	4096
+#define GUC_ID_NUMBER_SLRC	(GUC_ID_MAX - GUC_ID_NUMBER_MLRC)
+#define GUC_ID_START_MLRC	GUC_ID_NUMBER_SLRC
+
+static const struct xe_exec_queue_ops guc_exec_queue_ops;
+
+static void primelockdep(struct xe_guc *guc)
+{
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+
+	mutex_lock(&guc->submission_state.lock);
+	might_lock(&guc->submission_state.suspend.lock);
+	mutex_unlock(&guc->submission_state.lock);
+
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_submit_init(struct xe_guc *guc)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
+	int err;
+
+	guc->submission_state.guc_ids_bitmap =
+		bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL);
+	if (!guc->submission_state.guc_ids_bitmap)
+		return -ENOMEM;
+
+	err = alloc_submit_wq(guc);
+	if (err) {
+		bitmap_free(guc->submission_state.guc_ids_bitmap);
+		return err;
+	}
+
+	gt->exec_queue_ops = &guc_exec_queue_ops;
+
+	mutex_init(&guc->submission_state.lock);
+	xa_init(&guc->submission_state.exec_queue_lookup);
+	ida_init(&guc->submission_state.guc_ids);
+
+	spin_lock_init(&guc->submission_state.suspend.lock);
+	guc->submission_state.suspend.context = dma_fence_context_alloc(1);
+
+	primelockdep(guc);
+
+	err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
+{
+	int i;
+
+	lockdep_assert_held(&guc->submission_state.lock);
+
+	for (i = 0; i < xa_count; ++i)
+		xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
+
+	if (xe_exec_queue_is_parallel(q))
+		bitmap_release_region(guc->submission_state.guc_ids_bitmap,
+				      q->guc->id - GUC_ID_START_MLRC,
+				      order_base_2(q->width));
+	else
+		ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
+}
+
+static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	int ret;
+	void *ptr;
+	int i;
+
+	/*
+	 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
+	 * worse case user gets -ENOMEM on engine create and has to try again.
+	 *
+	 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
+	 * failure.
+	 */
+	lockdep_assert_held(&guc->submission_state.lock);
+
+	if (xe_exec_queue_is_parallel(q)) {
+		void *bitmap = guc->submission_state.guc_ids_bitmap;
+
+		ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
+					      order_base_2(q->width));
+	} else {
+		ret = ida_simple_get(&guc->submission_state.guc_ids, 0,
+				     GUC_ID_NUMBER_SLRC, GFP_NOWAIT);
+	}
+	if (ret < 0)
+		return ret;
+
+	q->guc->id = ret;
+	if (xe_exec_queue_is_parallel(q))
+		q->guc->id += GUC_ID_START_MLRC;
+
+	for (i = 0; i < q->width; ++i) {
+		ptr = xa_store(&guc->submission_state.exec_queue_lookup,
+			       q->guc->id + i, q, GFP_NOWAIT);
+		if (IS_ERR(ptr)) {
+			ret = PTR_ERR(ptr);
+			goto err_release;
+		}
+	}
+
+	return 0;
+
+err_release:
+	__release_guc_id(guc, q, i);
+
+	return ret;
+}
+
+static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	mutex_lock(&guc->submission_state.lock);
+	__release_guc_id(guc, q, q->width);
+	mutex_unlock(&guc->submission_state.lock);
+}
+
+struct exec_queue_policy {
+	u32 count;
+	struct guc_update_exec_queue_policy h2g;
+};
+
+static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
+{
+	size_t bytes = sizeof(policy->h2g.header) +
+		       (sizeof(policy->h2g.klv[0]) * policy->count);
+
+	return bytes / sizeof(u32);
+}
+
+static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
+					      u16 guc_id)
+{
+	policy->h2g.header.action =
+		XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
+	policy->h2g.header.guc_id = guc_id;
+	policy->count = 0;
+}
+
+#define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
+static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
+					   u32 data) \
+{ \
+	XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
+\
+	policy->h2g.klv[policy->count].kl = \
+		FIELD_PREP(GUC_KLV_0_KEY, \
+			   GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
+		FIELD_PREP(GUC_KLV_0_LEN, 1); \
+	policy->h2g.klv[policy->count].value = data; \
+	policy->count++; \
+}
+
+MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
+MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
+MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
+#undef MAKE_EXEC_QUEUE_POLICY_ADD
+
+static const int xe_exec_queue_prio_to_guc[] = {
+	[XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
+	[XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
+	[XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
+	[XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
+};
+
+static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	struct exec_queue_policy policy;
+	struct xe_device *xe = guc_to_xe(guc);
+	enum xe_exec_queue_priority prio = q->priority;
+	u32 timeslice_us = q->sched_props.timeslice_us;
+	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
+
+	xe_assert(xe, exec_queue_registered(q));
+
+	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
+	__guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
+	__guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
+	__guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
+
+	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
+		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
+}
+
+static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	struct exec_queue_policy policy;
+
+	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
+	__guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
+
+	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
+		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
+}
+
+#define parallel_read(xe_, map_, field_) \
+	xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
+			field_)
+#define parallel_write(xe_, map_, field_, val_) \
+	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
+			field_, val_)
+
+static void __register_mlrc_engine(struct xe_guc *guc,
+				   struct xe_exec_queue *q,
+				   struct guc_ctxt_registration_info *info)
+{
+#define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
+	struct xe_device *xe = guc_to_xe(guc);
+	u32 action[MAX_MLRC_REG_SIZE];
+	int len = 0;
+	int i;
+
+	xe_assert(xe, xe_exec_queue_is_parallel(q));
+
+	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
+	action[len++] = info->flags;
+	action[len++] = info->context_idx;
+	action[len++] = info->engine_class;
+	action[len++] = info->engine_submit_mask;
+	action[len++] = info->wq_desc_lo;
+	action[len++] = info->wq_desc_hi;
+	action[len++] = info->wq_base_lo;
+	action[len++] = info->wq_base_hi;
+	action[len++] = info->wq_size;
+	action[len++] = q->width;
+	action[len++] = info->hwlrca_lo;
+	action[len++] = info->hwlrca_hi;
+
+	for (i = 1; i < q->width; ++i) {
+		struct xe_lrc *lrc = q->lrc + i;
+
+		action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
+		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
+	}
+
+	xe_assert(xe, len <= MAX_MLRC_REG_SIZE);
+#undef MAX_MLRC_REG_SIZE
+
+	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
+}
+
+static void __register_engine(struct xe_guc *guc,
+			      struct guc_ctxt_registration_info *info)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_REGISTER_CONTEXT,
+		info->flags,
+		info->context_idx,
+		info->engine_class,
+		info->engine_submit_mask,
+		info->wq_desc_lo,
+		info->wq_desc_hi,
+		info->wq_base_lo,
+		info->wq_base_hi,
+		info->wq_size,
+		info->hwlrca_lo,
+		info->hwlrca_hi,
+	};
+
+	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+static void register_engine(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_lrc *lrc = q->lrc;
+	struct guc_ctxt_registration_info info;
+
+	xe_assert(xe, !exec_queue_registered(q));
+
+	memset(&info, 0, sizeof(info));
+	info.context_idx = q->guc->id;
+	info.engine_class = xe_engine_class_to_guc_class(q->class);
+	info.engine_submit_mask = q->logical_mask;
+	info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
+	info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
+	info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
+
+	if (xe_exec_queue_is_parallel(q)) {
+		u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
+		struct iosys_map map = xe_lrc_parallel_map(lrc);
+
+		info.wq_desc_lo = lower_32_bits(ggtt_addr +
+			offsetof(struct guc_submit_parallel_scratch, wq_desc));
+		info.wq_desc_hi = upper_32_bits(ggtt_addr +
+			offsetof(struct guc_submit_parallel_scratch, wq_desc));
+		info.wq_base_lo = lower_32_bits(ggtt_addr +
+			offsetof(struct guc_submit_parallel_scratch, wq[0]));
+		info.wq_base_hi = upper_32_bits(ggtt_addr +
+			offsetof(struct guc_submit_parallel_scratch, wq[0]));
+		info.wq_size = WQ_SIZE;
+
+		q->guc->wqi_head = 0;
+		q->guc->wqi_tail = 0;
+		xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
+		parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
+	}
+
+	/*
+	 * We must keep a reference for LR engines if engine is registered with
+	 * the GuC as jobs signal immediately and can't destroy an engine if the
+	 * GuC has a reference to it.
+	 */
+	if (xe_exec_queue_is_lr(q))
+		xe_exec_queue_get(q);
+
+	set_exec_queue_registered(q);
+	trace_xe_exec_queue_register(q);
+	if (xe_exec_queue_is_parallel(q))
+		__register_mlrc_engine(guc, q, &info);
+	else
+		__register_engine(guc, &info);
+	init_policies(guc, q);
+}
+
+static u32 wq_space_until_wrap(struct xe_exec_queue *q)
+{
+	return (WQ_SIZE - q->guc->wqi_tail);
+}
+
+static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
+	unsigned int sleep_period_ms = 1;
+
+#define AVAILABLE_SPACE \
+	CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
+	if (wqi_size > AVAILABLE_SPACE) {
+try_again:
+		q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
+		if (wqi_size > AVAILABLE_SPACE) {
+			if (sleep_period_ms == 1024) {
+				xe_gt_reset_async(q->gt);
+				return -ENODEV;
+			}
+
+			msleep(sleep_period_ms);
+			sleep_period_ms <<= 1;
+			goto try_again;
+		}
+	}
+#undef AVAILABLE_SPACE
+
+	return 0;
+}
+
+static int wq_noop_append(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
+	u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
+
+	if (wq_wait_for_space(q, wq_space_until_wrap(q)))
+		return -ENODEV;
+
+	xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw));
+
+	parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
+		       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
+		       FIELD_PREP(WQ_LEN_MASK, len_dw));
+	q->guc->wqi_tail = 0;
+
+	return 0;
+}
+
+static void wq_item_append(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
+#define WQ_HEADER_SIZE	4	/* Includes 1 LRC address too */
+	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
+	u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
+	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
+	int i = 0, j;
+
+	if (wqi_size > wq_space_until_wrap(q)) {
+		if (wq_noop_append(q))
+			return;
+	}
+	if (wq_wait_for_space(q, wqi_size))
+		return;
+
+	wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
+		FIELD_PREP(WQ_LEN_MASK, len_dw);
+	wqi[i++] = xe_lrc_descriptor(q->lrc);
+	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
+		FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64));
+	wqi[i++] = 0;
+	for (j = 1; j < q->width; ++j) {
+		struct xe_lrc *lrc = q->lrc + j;
+
+		wqi[i++] = lrc->ring.tail / sizeof(u64);
+	}
+
+	xe_assert(xe, i == wqi_size / sizeof(u32));
+
+	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
+				      wq[q->guc->wqi_tail / sizeof(u32)]));
+	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
+	q->guc->wqi_tail += wqi_size;
+	xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE);
+
+	xe_device_wmb(xe);
+
+	map = xe_lrc_parallel_map(q->lrc);
+	parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
+}
+
+#define RESUME_PENDING	~0x0ull
+static void submit_exec_queue(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_lrc *lrc = q->lrc;
+	u32 action[3];
+	u32 g2h_len = 0;
+	u32 num_g2h = 0;
+	int len = 0;
+	bool extra_submit = false;
+
+	xe_assert(xe, exec_queue_registered(q));
+
+	if (xe_exec_queue_is_parallel(q))
+		wq_item_append(q);
+	else
+		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+
+	if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
+		return;
+
+	if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
+		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
+		action[len++] = q->guc->id;
+		action[len++] = GUC_CONTEXT_ENABLE;
+		g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
+		num_g2h = 1;
+		if (xe_exec_queue_is_parallel(q))
+			extra_submit = true;
+
+		q->guc->resume_time = RESUME_PENDING;
+		set_exec_queue_pending_enable(q);
+		set_exec_queue_enabled(q);
+		trace_xe_exec_queue_scheduling_enable(q);
+	} else {
+		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
+		action[len++] = q->guc->id;
+		trace_xe_exec_queue_submit(q);
+	}
+
+	xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
+
+	if (extra_submit) {
+		len = 0;
+		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
+		action[len++] = q->guc->id;
+		trace_xe_exec_queue_submit(q);
+
+		xe_guc_ct_send(&guc->ct, action, len, 0, 0);
+	}
+}
+
+static struct dma_fence *
+guc_exec_queue_run_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+	struct xe_exec_queue *q = job->q;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	bool lr = xe_exec_queue_is_lr(q);
+
+	xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
+		  exec_queue_banned(q) || exec_queue_suspended(q));
+
+	trace_xe_sched_job_run(job);
+
+	if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) {
+		if (!exec_queue_registered(q))
+			register_engine(q);
+		if (!lr)	/* LR jobs are emitted in the exec IOCTL */
+			q->ring_ops->emit_job(job);
+		submit_exec_queue(q);
+	}
+
+	if (lr) {
+		xe_sched_job_set_error(job, -EOPNOTSUPP);
+		return NULL;
+	} else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) {
+		return job->fence;
+	} else {
+		return dma_fence_get(job->fence);
+	}
+}
+
+static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+
+	trace_xe_sched_job_free(job);
+	xe_sched_job_put(job);
+}
+
+static int guc_read_stopped(struct xe_guc *guc)
+{
+	return atomic_read(&guc->submission_state.stopped);
+}
+
+#define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable)			\
+	u32 action[] = {						\
+		XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET,			\
+		q->guc->id,						\
+		GUC_CONTEXT_##enable_disable,				\
+	}
+
+static void disable_scheduling_deregister(struct xe_guc *guc,
+					  struct xe_exec_queue *q)
+{
+	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
+	struct xe_device *xe = guc_to_xe(guc);
+	int ret;
+
+	set_min_preemption_timeout(guc, q);
+	smp_rmb();
+	ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) ||
+				 guc_read_stopped(guc), HZ * 5);
+	if (!ret) {
+		struct xe_gpu_scheduler *sched = &q->guc->sched;
+
+		drm_warn(&xe->drm, "Pending enable failed to respond");
+		xe_sched_submission_start(sched);
+		xe_gt_reset_async(q->gt);
+		xe_sched_tdr_queue_imm(sched);
+		return;
+	}
+
+	clear_exec_queue_enabled(q);
+	set_exec_queue_pending_disable(q);
+	set_exec_queue_destroyed(q);
+	trace_xe_exec_queue_scheduling_disable(q);
+
+	/*
+	 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
+	 * handler and we are not allowed to reserved G2H space in handlers.
+	 */
+	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
+		       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
+}
+
+static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p);
+
+#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
+static void simple_error_capture(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct drm_printer p = drm_err_printer("");
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	u32 adj_logical_mask = q->logical_mask;
+	u32 width_mask = (0x1 << q->width) - 1;
+	int i;
+	bool cookie;
+
+	if (q->vm && !q->vm->error_capture.capture_once) {
+		q->vm->error_capture.capture_once = true;
+		cookie = dma_fence_begin_signalling();
+		for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
+			if (adj_logical_mask & BIT(i)) {
+				adj_logical_mask |= width_mask << i;
+				i += q->width;
+			} else {
+				++i;
+			}
+		}
+
+		xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+		xe_guc_ct_print(&guc->ct, &p, true);
+		guc_exec_queue_print(q, &p);
+		for_each_hw_engine(hwe, guc_to_gt(guc), id) {
+			if (hwe->class != q->hwe->class ||
+			    !(BIT(hwe->logical_instance) & adj_logical_mask))
+				continue;
+			xe_hw_engine_print(hwe, &p);
+		}
+		xe_analyze_vm(&p, q->vm, q->gt->info.id);
+		xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+		dma_fence_end_signalling(cookie);
+	}
+}
+#else
+static void simple_error_capture(struct xe_exec_queue *q)
+{
+}
+#endif
+
+static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+
+	/** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
+	wake_up_all(&xe->ufence_wq);
+
+	if (xe_exec_queue_is_lr(q))
+		queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
+	else
+		xe_sched_tdr_queue_imm(&q->guc->sched);
+}
+
+static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
+{
+	struct xe_guc_exec_queue *ge =
+		container_of(w, struct xe_guc_exec_queue, lr_tdr);
+	struct xe_exec_queue *q = ge->q;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gpu_scheduler *sched = &ge->sched;
+
+	xe_assert(xe, xe_exec_queue_is_lr(q));
+	trace_xe_exec_queue_lr_cleanup(q);
+
+	/* Kill the run_job / process_msg entry points */
+	xe_sched_submission_stop(sched);
+
+	/*
+	 * Engine state now mostly stable, disable scheduling / deregister if
+	 * needed. This cleanup routine might be called multiple times, where
+	 * the actual async engine deregister drops the final engine ref.
+	 * Calling disable_scheduling_deregister will mark the engine as
+	 * destroyed and fire off the CT requests to disable scheduling /
+	 * deregister, which we only want to do once. We also don't want to mark
+	 * the engine as pending_disable again as this may race with the
+	 * xe_guc_deregister_done_handler() which treats it as an unexpected
+	 * state.
+	 */
+	if (exec_queue_registered(q) && !exec_queue_destroyed(q)) {
+		struct xe_guc *guc = exec_queue_to_guc(q);
+		int ret;
+
+		set_exec_queue_banned(q);
+		disable_scheduling_deregister(guc, q);
+
+		/*
+		 * Must wait for scheduling to be disabled before signalling
+		 * any fences, if GT broken the GT reset code should signal us.
+		 */
+		ret = wait_event_timeout(guc->ct.wq,
+					 !exec_queue_pending_disable(q) ||
+					 guc_read_stopped(guc), HZ * 5);
+		if (!ret) {
+			drm_warn(&xe->drm, "Schedule disable failed to respond");
+			xe_sched_submission_start(sched);
+			xe_gt_reset_async(q->gt);
+			return;
+		}
+	}
+
+	xe_sched_submission_start(sched);
+}
+
+static enum drm_gpu_sched_stat
+guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
+{
+	struct xe_sched_job *job = to_xe_sched_job(drm_job);
+	struct xe_sched_job *tmp_job;
+	struct xe_exec_queue *q = job->q;
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
+	struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q));
+	int err = -ETIME;
+	int i = 0;
+
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
+		xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
+		xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)));
+
+		drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
+			   xe_sched_job_seqno(job), q->guc->id, q->flags);
+		simple_error_capture(q);
+		xe_devcoredump(q);
+	} else {
+		drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
+			 xe_sched_job_seqno(job), q->guc->id, q->flags);
+	}
+	trace_xe_sched_job_timedout(job);
+
+	/* Kill the run_job entry point */
+	xe_sched_submission_stop(sched);
+
+	/*
+	 * Kernel jobs should never fail, nor should VM jobs if they do
+	 * somethings has gone wrong and the GT needs a reset
+	 */
+	if (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
+	    (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) {
+		if (!xe_sched_invalidate_job(job, 2)) {
+			xe_sched_add_pending_job(sched, job);
+			xe_sched_submission_start(sched);
+			xe_gt_reset_async(q->gt);
+			goto out;
+		}
+	}
+
+	/* Engine state now stable, disable scheduling if needed */
+	if (exec_queue_registered(q)) {
+		struct xe_guc *guc = exec_queue_to_guc(q);
+		int ret;
+
+		if (exec_queue_reset(q))
+			err = -EIO;
+		set_exec_queue_banned(q);
+		if (!exec_queue_destroyed(q)) {
+			xe_exec_queue_get(q);
+			disable_scheduling_deregister(guc, q);
+		}
+
+		/*
+		 * Must wait for scheduling to be disabled before signalling
+		 * any fences, if GT broken the GT reset code should signal us.
+		 *
+		 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
+		 * error) messages which can cause the schedule disable to get
+		 * lost. If this occurs, trigger a GT reset to recover.
+		 */
+		smp_rmb();
+		ret = wait_event_timeout(guc->ct.wq,
+					 !exec_queue_pending_disable(q) ||
+					 guc_read_stopped(guc), HZ * 5);
+		if (!ret || guc_read_stopped(guc)) {
+			drm_warn(&xe->drm, "Schedule disable failed to respond");
+			xe_sched_add_pending_job(sched, job);
+			xe_sched_submission_start(sched);
+			xe_gt_reset_async(q->gt);
+			xe_sched_tdr_queue_imm(sched);
+			goto out;
+		}
+	}
+
+	/* Stop fence signaling */
+	xe_hw_fence_irq_stop(q->fence_irq);
+
+	/*
+	 * Fence state now stable, stop / start scheduler which cleans up any
+	 * fences that are complete
+	 */
+	xe_sched_add_pending_job(sched, job);
+	xe_sched_submission_start(sched);
+	xe_guc_exec_queue_trigger_cleanup(q);
+
+	/* Mark all outstanding jobs as bad, thus completing them */
+	spin_lock(&sched->base.job_list_lock);
+	list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
+		xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
+	spin_unlock(&sched->base.job_list_lock);
+
+	/* Start fence signaling */
+	xe_hw_fence_irq_start(q->fence_irq);
+
+out:
+	return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+static void __guc_exec_queue_fini_async(struct work_struct *w)
+{
+	struct xe_guc_exec_queue *ge =
+		container_of(w, struct xe_guc_exec_queue, fini_async);
+	struct xe_exec_queue *q = ge->q;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+
+	trace_xe_exec_queue_destroy(q);
+
+	if (xe_exec_queue_is_lr(q))
+		cancel_work_sync(&ge->lr_tdr);
+	if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT)
+		xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q);
+	release_guc_id(guc, q);
+	xe_sched_entity_fini(&ge->entity);
+	xe_sched_fini(&ge->sched);
+
+	kfree(ge);
+	xe_exec_queue_fini(q);
+}
+
+static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
+{
+	INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
+
+	/* We must block on kernel engines so slabs are empty on driver unload */
+	if (q->flags & EXEC_QUEUE_FLAG_PERMANENT)
+		__guc_exec_queue_fini_async(&q->guc->fini_async);
+	else
+		queue_work(system_wq, &q->guc->fini_async);
+}
+
+static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	/*
+	 * Might be done from within the GPU scheduler, need to do async as we
+	 * fini the scheduler when the engine is fini'd, the scheduler can't
+	 * complete fini within itself (circular dependency). Async resolves
+	 * this we and don't really care when everything is fini'd, just that it
+	 * is.
+	 */
+	guc_exec_queue_fini_async(q);
+}
+
+static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
+{
+	struct xe_exec_queue *q = msg->private_data;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+
+	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
+	trace_xe_exec_queue_cleanup_entity(q);
+
+	if (exec_queue_registered(q))
+		disable_scheduling_deregister(guc, q);
+	else
+		__guc_exec_queue_fini(guc, q);
+}
+
+static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
+{
+	return !exec_queue_killed_or_banned(q) && exec_queue_registered(q);
+}
+
+static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
+{
+	struct xe_exec_queue *q = msg->private_data;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+
+	if (guc_exec_queue_allowed_to_change_state(q))
+		init_policies(guc, q);
+	kfree(msg);
+}
+
+static void suspend_fence_signal(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+
+	xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
+		  guc_read_stopped(guc));
+	xe_assert(xe, q->guc->suspend_pending);
+
+	q->guc->suspend_pending = false;
+	smp_wmb();
+	wake_up(&q->guc->suspend_wait);
+}
+
+static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
+{
+	struct xe_exec_queue *q = msg->private_data;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+
+	if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
+	    exec_queue_enabled(q)) {
+		wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING ||
+			   guc_read_stopped(guc));
+
+		if (!guc_read_stopped(guc)) {
+			MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
+			s64 since_resume_ms =
+				ktime_ms_delta(ktime_get(),
+					       q->guc->resume_time);
+			s64 wait_ms = q->vm->preempt.min_run_period_ms -
+				since_resume_ms;
+
+			if (wait_ms > 0 && q->guc->resume_time)
+				msleep(wait_ms);
+
+			set_exec_queue_suspended(q);
+			clear_exec_queue_enabled(q);
+			set_exec_queue_pending_disable(q);
+			trace_xe_exec_queue_scheduling_disable(q);
+
+			xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+				       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
+		}
+	} else if (q->guc->suspend_pending) {
+		set_exec_queue_suspended(q);
+		suspend_fence_signal(q);
+	}
+}
+
+static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
+{
+	struct xe_exec_queue *q = msg->private_data;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+
+	if (guc_exec_queue_allowed_to_change_state(q)) {
+		MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
+
+		q->guc->resume_time = RESUME_PENDING;
+		clear_exec_queue_suspended(q);
+		set_exec_queue_pending_enable(q);
+		set_exec_queue_enabled(q);
+		trace_xe_exec_queue_scheduling_enable(q);
+
+		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
+	} else {
+		clear_exec_queue_suspended(q);
+	}
+}
+
+#define CLEANUP		1	/* Non-zero values to catch uninitialized msg */
+#define SET_SCHED_PROPS	2
+#define SUSPEND		3
+#define RESUME		4
+
+static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
+{
+	trace_xe_sched_msg_recv(msg);
+
+	switch (msg->opcode) {
+	case CLEANUP:
+		__guc_exec_queue_process_msg_cleanup(msg);
+		break;
+	case SET_SCHED_PROPS:
+		__guc_exec_queue_process_msg_set_sched_props(msg);
+		break;
+	case SUSPEND:
+		__guc_exec_queue_process_msg_suspend(msg);
+		break;
+	case RESUME:
+		__guc_exec_queue_process_msg_resume(msg);
+		break;
+	default:
+		XE_WARN_ON("Unknown message type");
+	}
+}
+
+static const struct drm_sched_backend_ops drm_sched_ops = {
+	.run_job = guc_exec_queue_run_job,
+	.free_job = guc_exec_queue_free_job,
+	.timedout_job = guc_exec_queue_timedout_job,
+};
+
+static const struct xe_sched_backend_ops xe_sched_ops = {
+	.process_msg = guc_exec_queue_process_msg,
+};
+
+static int guc_exec_queue_init(struct xe_exec_queue *q)
+{
+	struct xe_gpu_scheduler *sched;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_guc_exec_queue *ge;
+	long timeout;
+	int err;
+
+	xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
+
+	ge = kzalloc(sizeof(*ge), GFP_KERNEL);
+	if (!ge)
+		return -ENOMEM;
+
+	q->guc = ge;
+	ge->q = q;
+	init_waitqueue_head(&ge->suspend_wait);
+
+	timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
+		  q->hwe->eclass->sched_props.job_timeout_ms;
+	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
+			    get_submit_wq(guc),
+			    q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64,
+			    timeout, guc_to_gt(guc)->ordered_wq, NULL,
+			    q->name, gt_to_xe(q->gt)->drm.dev);
+	if (err)
+		goto err_free;
+
+	sched = &ge->sched;
+	err = xe_sched_entity_init(&ge->entity, sched);
+	if (err)
+		goto err_sched;
+	q->priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
+
+	if (xe_exec_queue_is_lr(q))
+		INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
+
+	mutex_lock(&guc->submission_state.lock);
+
+	err = alloc_guc_id(guc, q);
+	if (err)
+		goto err_entity;
+
+	q->entity = &ge->entity;
+
+	if (guc_read_stopped(guc))
+		xe_sched_stop(sched);
+
+	mutex_unlock(&guc->submission_state.lock);
+
+	xe_exec_queue_assign_name(q, q->guc->id);
+
+	trace_xe_exec_queue_create(q);
+
+	return 0;
+
+err_entity:
+	xe_sched_entity_fini(&ge->entity);
+err_sched:
+	xe_sched_fini(&ge->sched);
+err_free:
+	kfree(ge);
+
+	return err;
+}
+
+static void guc_exec_queue_kill(struct xe_exec_queue *q)
+{
+	trace_xe_exec_queue_kill(q);
+	set_exec_queue_killed(q);
+	xe_guc_exec_queue_trigger_cleanup(q);
+}
+
+static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
+				   u32 opcode)
+{
+	INIT_LIST_HEAD(&msg->link);
+	msg->opcode = opcode;
+	msg->private_data = q;
+
+	trace_xe_sched_msg_add(msg);
+	xe_sched_add_msg(&q->guc->sched, msg);
+}
+
+#define STATIC_MSG_CLEANUP	0
+#define STATIC_MSG_SUSPEND	1
+#define STATIC_MSG_RESUME	2
+static void guc_exec_queue_fini(struct xe_exec_queue *q)
+{
+	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
+
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT))
+		guc_exec_queue_add_msg(q, msg, CLEANUP);
+	else
+		__guc_exec_queue_fini(exec_queue_to_guc(q), q);
+}
+
+static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
+				       enum xe_exec_queue_priority priority)
+{
+	struct xe_sched_msg *msg;
+
+	if (q->priority == priority || exec_queue_killed_or_banned(q))
+		return 0;
+
+	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
+	q->priority = priority;
+
+	return 0;
+}
+
+static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
+{
+	struct xe_sched_msg *msg;
+
+	if (q->sched_props.timeslice_us == timeslice_us ||
+	    exec_queue_killed_or_banned(q))
+		return 0;
+
+	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	q->sched_props.timeslice_us = timeslice_us;
+	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
+
+	return 0;
+}
+
+static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
+					      u32 preempt_timeout_us)
+{
+	struct xe_sched_msg *msg;
+
+	if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
+	    exec_queue_killed_or_banned(q))
+		return 0;
+
+	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	q->sched_props.preempt_timeout_us = preempt_timeout_us;
+	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
+
+	return 0;
+}
+
+static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms)
+{
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+
+	xe_assert(xe, !exec_queue_registered(q));
+	xe_assert(xe, !exec_queue_banned(q));
+	xe_assert(xe, !exec_queue_killed(q));
+
+	sched->base.timeout = job_timeout_ms;
+
+	return 0;
+}
+
+static int guc_exec_queue_suspend(struct xe_exec_queue *q)
+{
+	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
+
+	if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending)
+		return -EINVAL;
+
+	q->guc->suspend_pending = true;
+	guc_exec_queue_add_msg(q, msg, SUSPEND);
+
+	return 0;
+}
+
+static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+
+	wait_event(q->guc->suspend_wait, !q->guc->suspend_pending ||
+		   guc_read_stopped(guc));
+}
+
+static void guc_exec_queue_resume(struct xe_exec_queue *q)
+{
+	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+
+	xe_assert(xe, !q->guc->suspend_pending);
+
+	guc_exec_queue_add_msg(q, msg, RESUME);
+}
+
+static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
+{
+	return exec_queue_reset(q);
+}
+
+/*
+ * All of these functions are an abstraction layer which other parts of XE can
+ * use to trap into the GuC backend. All of these functions, aside from init,
+ * really shouldn't do much other than trap into the DRM scheduler which
+ * synchronizes these operations.
+ */
+static const struct xe_exec_queue_ops guc_exec_queue_ops = {
+	.init = guc_exec_queue_init,
+	.kill = guc_exec_queue_kill,
+	.fini = guc_exec_queue_fini,
+	.set_priority = guc_exec_queue_set_priority,
+	.set_timeslice = guc_exec_queue_set_timeslice,
+	.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
+	.set_job_timeout = guc_exec_queue_set_job_timeout,
+	.suspend = guc_exec_queue_suspend,
+	.suspend_wait = guc_exec_queue_suspend_wait,
+	.resume = guc_exec_queue_resume,
+	.reset_status = guc_exec_queue_reset_status,
+};
+
+static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
+
+	/* Stop scheduling + flush any DRM scheduler operations */
+	xe_sched_submission_stop(sched);
+
+	/* Clean up lost G2H + reset engine state */
+	if (exec_queue_registered(q)) {
+		if ((exec_queue_banned(q) && exec_queue_destroyed(q)) ||
+		    xe_exec_queue_is_lr(q))
+			xe_exec_queue_put(q);
+		else if (exec_queue_destroyed(q))
+			__guc_exec_queue_fini(guc, q);
+	}
+	if (q->guc->suspend_pending) {
+		set_exec_queue_suspended(q);
+		suspend_fence_signal(q);
+	}
+	atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED,
+		   &q->guc->state);
+	q->guc->resume_time = 0;
+	trace_xe_exec_queue_stop(q);
+
+	/*
+	 * Ban any engine (aside from kernel and engines used for VM ops) with a
+	 * started but not complete job or if a job has gone through a GT reset
+	 * more than twice.
+	 */
+	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
+		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
+
+		if (job) {
+			if ((xe_sched_job_started(job) &&
+			    !xe_sched_job_completed(job)) ||
+			    xe_sched_invalidate_job(job, 2)) {
+				trace_xe_sched_job_ban(job);
+				xe_sched_tdr_queue_imm(&q->guc->sched);
+				set_exec_queue_banned(q);
+			}
+		}
+	}
+}
+
+int xe_guc_submit_reset_prepare(struct xe_guc *guc)
+{
+	int ret;
+
+	/*
+	 * Using an atomic here rather than submission_state.lock as this
+	 * function can be called while holding the CT lock (engine reset
+	 * failure). submission_state.lock needs the CT lock to resubmit jobs.
+	 * Atomic is not ideal, but it works to prevent against concurrent reset
+	 * and releasing any TDRs waiting on guc->submission_state.stopped.
+	 */
+	ret = atomic_fetch_or(1, &guc->submission_state.stopped);
+	smp_wmb();
+	wake_up_all(&guc->ct.wq);
+
+	return ret;
+}
+
+void xe_guc_submit_reset_wait(struct xe_guc *guc)
+{
+	wait_event(guc->ct.wq, !guc_read_stopped(guc));
+}
+
+int xe_guc_submit_stop(struct xe_guc *guc)
+{
+	struct xe_exec_queue *q;
+	unsigned long index;
+	struct xe_device *xe = guc_to_xe(guc);
+
+	xe_assert(xe, guc_read_stopped(guc) == 1);
+
+	mutex_lock(&guc->submission_state.lock);
+
+	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+		guc_exec_queue_stop(guc, q);
+
+	mutex_unlock(&guc->submission_state.lock);
+
+	/*
+	 * No one can enter the backend at this point, aside from new engine
+	 * creation which is protected by guc->submission_state.lock.
+	 */
+
+	return 0;
+}
+
+static void guc_exec_queue_start(struct xe_exec_queue *q)
+{
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
+
+	if (!exec_queue_killed_or_banned(q)) {
+		int i;
+
+		trace_xe_exec_queue_resubmit(q);
+		for (i = 0; i < q->width; ++i)
+			xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail);
+		xe_sched_resubmit_jobs(sched);
+	}
+
+	xe_sched_submission_start(sched);
+}
+
+int xe_guc_submit_start(struct xe_guc *guc)
+{
+	struct xe_exec_queue *q;
+	unsigned long index;
+	struct xe_device *xe = guc_to_xe(guc);
+
+	xe_assert(xe, guc_read_stopped(guc) == 1);
+
+	mutex_lock(&guc->submission_state.lock);
+	atomic_dec(&guc->submission_state.stopped);
+	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+		guc_exec_queue_start(q);
+	mutex_unlock(&guc->submission_state.lock);
+
+	wake_up_all(&guc->ct.wq);
+
+	return 0;
+}
+
+static struct xe_exec_queue *
+g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_exec_queue *q;
+
+	if (unlikely(guc_id >= GUC_ID_MAX)) {
+		drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
+		return NULL;
+	}
+
+	q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
+	if (unlikely(!q)) {
+		drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
+		return NULL;
+	}
+
+	xe_assert(xe, guc_id >= q->guc->id);
+	xe_assert(xe, guc_id < (q->guc->id + q->width));
+
+	return q;
+}
+
+static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	u32 action[] = {
+		XE_GUC_ACTION_DEREGISTER_CONTEXT,
+		q->guc->id,
+	};
+
+	trace_xe_exec_queue_deregister(q);
+
+	xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_exec_queue *q;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 2)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	q = g2h_exec_queue_lookup(guc, guc_id);
+	if (unlikely(!q))
+		return -EPROTO;
+
+	if (unlikely(!exec_queue_pending_enable(q) &&
+		     !exec_queue_pending_disable(q))) {
+		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
+			atomic_read(&q->guc->state));
+		return -EPROTO;
+	}
+
+	trace_xe_exec_queue_scheduling_done(q);
+
+	if (exec_queue_pending_enable(q)) {
+		q->guc->resume_time = ktime_get();
+		clear_exec_queue_pending_enable(q);
+		smp_wmb();
+		wake_up_all(&guc->ct.wq);
+	} else {
+		clear_exec_queue_pending_disable(q);
+		if (q->guc->suspend_pending) {
+			suspend_fence_signal(q);
+		} else {
+			if (exec_queue_banned(q)) {
+				smp_wmb();
+				wake_up_all(&guc->ct.wq);
+			}
+			deregister_exec_queue(guc, q);
+		}
+	}
+
+	return 0;
+}
+
+int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_exec_queue *q;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	q = g2h_exec_queue_lookup(guc, guc_id);
+	if (unlikely(!q))
+		return -EPROTO;
+
+	if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
+	    exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
+		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
+			atomic_read(&q->guc->state));
+		return -EPROTO;
+	}
+
+	trace_xe_exec_queue_deregister_done(q);
+
+	clear_exec_queue_registered(q);
+
+	if (exec_queue_banned(q) || xe_exec_queue_is_lr(q))
+		xe_exec_queue_put(q);
+	else
+		__guc_exec_queue_fini(guc, q);
+
+	return 0;
+}
+
+int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_exec_queue *q;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	q = g2h_exec_queue_lookup(guc, guc_id);
+	if (unlikely(!q))
+		return -EPROTO;
+
+	drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id);
+
+	/* FIXME: Do error capture, most likely async */
+
+	trace_xe_exec_queue_reset(q);
+
+	/*
+	 * A banned engine is a NOP at this point (came from
+	 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
+	 * jobs by setting timeout of the job to the minimum value kicking
+	 * guc_exec_queue_timedout_job.
+	 */
+	set_exec_queue_reset(q);
+	if (!exec_queue_banned(q))
+		xe_guc_exec_queue_trigger_cleanup(q);
+
+	return 0;
+}
+
+int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
+					       u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_exec_queue *q;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	q = g2h_exec_queue_lookup(guc, guc_id);
+	if (unlikely(!q))
+		return -EPROTO;
+
+	drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id);
+	trace_xe_exec_queue_memory_cat_error(q);
+
+	/* Treat the same as engine reset */
+	set_exec_queue_reset(q);
+	if (!exec_queue_banned(q))
+		xe_guc_exec_queue_trigger_cleanup(q);
+
+	return 0;
+}
+
+int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	u8 guc_class, instance;
+	u32 reason;
+
+	if (unlikely(len != 3)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	guc_class = msg[0];
+	instance = msg[1];
+	reason = msg[2];
+
+	/* Unexpected failure of a hardware feature, log an actual error */
+	drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
+		guc_class, instance, reason);
+
+	xe_gt_reset_async(guc_to_gt(guc));
+
+	return 0;
+}
+
+static void
+guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
+				   struct xe_guc_submit_exec_queue_snapshot *snapshot)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
+	int i;
+
+	snapshot->guc.wqi_head = q->guc->wqi_head;
+	snapshot->guc.wqi_tail = q->guc->wqi_tail;
+	snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
+	snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
+	snapshot->parallel.wq_desc.status = parallel_read(xe, map,
+							  wq_desc.wq_status);
+
+	if (snapshot->parallel.wq_desc.head !=
+	    snapshot->parallel.wq_desc.tail) {
+		for (i = snapshot->parallel.wq_desc.head;
+		     i != snapshot->parallel.wq_desc.tail;
+		     i = (i + sizeof(u32)) % WQ_SIZE)
+			snapshot->parallel.wq[i / sizeof(u32)] =
+				parallel_read(xe, map, wq[i / sizeof(u32)]);
+	}
+}
+
+static void
+guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
+				 struct drm_printer *p)
+{
+	int i;
+
+	drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
+		   snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
+	drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
+		   snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
+	drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
+
+	if (snapshot->parallel.wq_desc.head !=
+	    snapshot->parallel.wq_desc.tail) {
+		for (i = snapshot->parallel.wq_desc.head;
+		     i != snapshot->parallel.wq_desc.tail;
+		     i = (i + sizeof(u32)) % WQ_SIZE)
+			drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
+				   snapshot->parallel.wq[i / sizeof(u32)]);
+	}
+}
+
+/**
+ * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
+ * @q: Xe exec queue.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a GuC Submit Engine snapshot object that must be freed by the
+ * caller, using `xe_guc_exec_queue_snapshot_free`.
+ */
+struct xe_guc_submit_exec_queue_snapshot *
+xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gpu_scheduler *sched = &q->guc->sched;
+	struct xe_sched_job *job;
+	struct xe_guc_submit_exec_queue_snapshot *snapshot;
+	int i;
+
+	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
+
+	if (!snapshot) {
+		drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n");
+		return NULL;
+	}
+
+	snapshot->guc.id = q->guc->id;
+	memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
+	snapshot->class = q->class;
+	snapshot->logical_mask = q->logical_mask;
+	snapshot->width = q->width;
+	snapshot->refcount = kref_read(&q->refcount);
+	snapshot->sched_timeout = sched->base.timeout;
+	snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
+	snapshot->sched_props.preempt_timeout_us =
+		q->sched_props.preempt_timeout_us;
+
+	snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot),
+				      GFP_ATOMIC);
+
+	if (!snapshot->lrc) {
+		drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n");
+	} else {
+		for (i = 0; i < q->width; ++i) {
+			struct xe_lrc *lrc = q->lrc + i;
+
+			snapshot->lrc[i].context_desc =
+				lower_32_bits(xe_lrc_ggtt_addr(lrc));
+			snapshot->lrc[i].head = xe_lrc_ring_head(lrc);
+			snapshot->lrc[i].tail.internal = lrc->ring.tail;
+			snapshot->lrc[i].tail.memory =
+				xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL);
+			snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc);
+			snapshot->lrc[i].seqno = xe_lrc_seqno(lrc);
+		}
+	}
+
+	snapshot->schedule_state = atomic_read(&q->guc->state);
+	snapshot->exec_queue_flags = q->flags;
+
+	snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
+	if (snapshot->parallel_execution)
+		guc_exec_queue_wq_snapshot_capture(q, snapshot);
+
+	spin_lock(&sched->base.job_list_lock);
+	snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
+	snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
+					       sizeof(struct pending_list_snapshot),
+					       GFP_ATOMIC);
+
+	if (!snapshot->pending_list) {
+		drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n");
+	} else {
+		i = 0;
+		list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+			snapshot->pending_list[i].seqno =
+				xe_sched_job_seqno(job);
+			snapshot->pending_list[i].fence =
+				dma_fence_is_signaled(job->fence) ? 1 : 0;
+			snapshot->pending_list[i].finished =
+				dma_fence_is_signaled(&job->drm.s_fence->finished)
+				? 1 : 0;
+			i++;
+		}
+	}
+
+	spin_unlock(&sched->base.job_list_lock);
+
+	return snapshot;
+}
+
+/**
+ * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
+ * @snapshot: GuC Submit Engine snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC Submit Engine snapshot object.
+ */
+void
+xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
+				 struct drm_printer *p)
+{
+	int i;
+
+	if (!snapshot)
+		return;
+
+	drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id);
+	drm_printf(p, "\tName: %s\n", snapshot->name);
+	drm_printf(p, "\tClass: %d\n", snapshot->class);
+	drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
+	drm_printf(p, "\tWidth: %d\n", snapshot->width);
+	drm_printf(p, "\tRef: %d\n", snapshot->refcount);
+	drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
+	drm_printf(p, "\tTimeslice: %u (us)\n",
+		   snapshot->sched_props.timeslice_us);
+	drm_printf(p, "\tPreempt timeout: %u (us)\n",
+		   snapshot->sched_props.preempt_timeout_us);
+
+	for (i = 0; snapshot->lrc && i < snapshot->width; ++i) {
+		drm_printf(p, "\tHW Context Desc: 0x%08x\n",
+			   snapshot->lrc[i].context_desc);
+		drm_printf(p, "\tLRC Head: (memory) %u\n",
+			   snapshot->lrc[i].head);
+		drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
+			   snapshot->lrc[i].tail.internal,
+			   snapshot->lrc[i].tail.memory);
+		drm_printf(p, "\tStart seqno: (memory) %d\n",
+			   snapshot->lrc[i].start_seqno);
+		drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno);
+	}
+	drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
+	drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
+
+	if (snapshot->parallel_execution)
+		guc_exec_queue_wq_snapshot_print(snapshot, p);
+
+	for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
+	     i++)
+		drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
+			   snapshot->pending_list[i].seqno,
+			   snapshot->pending_list[i].fence,
+			   snapshot->pending_list[i].finished);
+}
+
+/**
+ * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
+ * snapshot.
+ * @snapshot: GuC Submit Engine snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
+{
+	if (!snapshot)
+		return;
+
+	kfree(snapshot->lrc);
+	kfree(snapshot->pending_list);
+	kfree(snapshot);
+}
+
+static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
+{
+	struct xe_guc_submit_exec_queue_snapshot *snapshot;
+
+	snapshot = xe_guc_exec_queue_snapshot_capture(q);
+	xe_guc_exec_queue_snapshot_print(snapshot, p);
+	xe_guc_exec_queue_snapshot_free(snapshot);
+}
+
+/**
+ * xe_guc_submit_print - GuC Submit Print.
+ * @guc: GuC.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function capture and prints snapshots of **all** GuC Engines.
+ */
+void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
+{
+	struct xe_exec_queue *q;
+	unsigned long index;
+
+	if (!xe_device_uc_enabled(guc_to_xe(guc)))
+		return;
+
+	mutex_lock(&guc->submission_state.lock);
+	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+		guc_exec_queue_print(q, p);
+	mutex_unlock(&guc->submission_state.lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
new file mode 100644
index 000000000000..fc97869c5b86
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_SUBMIT_H_
+#define _XE_GUC_SUBMIT_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_exec_queue;
+struct xe_guc;
+
+int xe_guc_submit_init(struct xe_guc *guc);
+
+int xe_guc_submit_reset_prepare(struct xe_guc *guc);
+void xe_guc_submit_reset_wait(struct xe_guc *guc);
+int xe_guc_submit_stop(struct xe_guc *guc);
+int xe_guc_submit_start(struct xe_guc *guc);
+
+int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
+					       u32 len);
+int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+struct xe_guc_submit_exec_queue_snapshot *
+xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
+void
+xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
+				 struct drm_printer *p);
+void
+xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot);
+void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h
new file mode 100644
index 000000000000..649b0a852692
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_SUBMIT_TYPES_H_
+#define _XE_GUC_SUBMIT_TYPES_H_
+
+#include "xe_hw_engine_types.h"
+
+/* Work item for submitting workloads into work queue of GuC. */
+#define WQ_STATUS_ACTIVE		1
+#define WQ_STATUS_SUSPENDED		2
+#define WQ_STATUS_CMD_ERROR		3
+#define WQ_STATUS_ENGINE_ID_NOT_USED	4
+#define WQ_STATUS_SUSPENDED_FROM_RESET	5
+#define WQ_TYPE_NOOP			0x4
+#define WQ_TYPE_MULTI_LRC		0x5
+#define WQ_TYPE_MASK			GENMASK(7, 0)
+#define WQ_LEN_MASK			GENMASK(26, 16)
+
+#define WQ_GUC_ID_MASK			GENMASK(15, 0)
+#define WQ_RING_TAIL_MASK		GENMASK(28, 18)
+
+#define PARALLEL_SCRATCH_SIZE	2048
+#define WQ_SIZE			(PARALLEL_SCRATCH_SIZE / 2)
+#define WQ_OFFSET		(PARALLEL_SCRATCH_SIZE - WQ_SIZE)
+#define CACHELINE_BYTES		64
+
+struct guc_sched_wq_desc {
+	u32 head;
+	u32 tail;
+	u32 error_offset;
+	u32 wq_status;
+	u32 reserved[28];
+} __packed;
+
+struct sync_semaphore {
+	u32 semaphore;
+	u8 unused[CACHELINE_BYTES - sizeof(u32)];
+};
+
+/**
+ * struct guc_submit_parallel_scratch - A scratch shared mapped buffer.
+ */
+struct guc_submit_parallel_scratch {
+	/** @wq_desc: Guc scheduler workqueue descriptor */
+	struct guc_sched_wq_desc wq_desc;
+
+	/** @go: Go Semaphore */
+	struct sync_semaphore go;
+	/** @join: Joined semaphore for the relevant hw engine instances */
+	struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE];
+
+	/** @unused: Unused/Reserved memory space */
+	u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
+		  sizeof(struct sync_semaphore) *
+		  (XE_HW_ENGINE_MAX_INSTANCE + 1)];
+
+	/** @wq: Workqueue info */
+	u32 wq[WQ_SIZE / sizeof(u32)];
+};
+
+struct lrc_snapshot {
+	u32 context_desc;
+	u32 head;
+	struct {
+		u32 internal;
+		u32 memory;
+	} tail;
+	u32 start_seqno;
+	u32 seqno;
+};
+
+struct pending_list_snapshot {
+	u32 seqno;
+	bool fence;
+	bool finished;
+};
+
+/**
+ * struct xe_guc_submit_exec_queue_snapshot - Snapshot for devcoredump
+ */
+struct xe_guc_submit_exec_queue_snapshot {
+	/** @name: name of this exec queue */
+	char name[MAX_FENCE_NAME_LEN];
+	/** @class: class of this exec queue */
+	enum xe_engine_class class;
+	/**
+	 * @logical_mask: logical mask of where job submitted to exec queue can run
+	 */
+	u32 logical_mask;
+	/** @width: width (number BB submitted per exec) of this exec queue */
+	u16 width;
+	/** @refcount: ref count of this exec queue */
+	u32 refcount;
+	/**
+	 * @sched_timeout: the time after which a job is removed from the
+	 * scheduler.
+	 */
+	long sched_timeout;
+
+	/** @sched_props: scheduling properties */
+	struct {
+		/** @timeslice_us: timeslice period in micro-seconds */
+		u32 timeslice_us;
+		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		u32 preempt_timeout_us;
+	} sched_props;
+
+	/** @lrc: LRC Snapshot */
+	struct lrc_snapshot *lrc;
+
+	/** @schedule_state: Schedule State at the moment of Crash */
+	u32 schedule_state;
+	/** @exec_queue_flags: Flags of the faulty exec_queue */
+	unsigned long exec_queue_flags;
+
+	/** @guc: GuC Engine Snapshot */
+	struct {
+		/** @wqi_head: work queue item head */
+		u32 wqi_head;
+		/** @wqi_tail: work queue item tail */
+		u32 wqi_tail;
+		/** @id: GuC id for this exec_queue */
+		u16 id;
+	} guc;
+
+	/**
+	 * @parallel_execution: Indication if the failure was during parallel
+	 * execution
+	 */
+	bool parallel_execution;
+	/** @parallel: snapshot of the useful parallel scratch */
+	struct {
+		/** @wq_desc: Workqueue description */
+		struct {
+			/** @head: Workqueue Head */
+			u32 head;
+			/** @tail: Workqueue Tail */
+			u32 tail;
+			/** @status: Workqueue Status */
+			u32 status;
+		} wq_desc;
+		/** @wq: Workqueue Items */
+		u32 wq[WQ_SIZE / sizeof(u32)];
+	} parallel;
+
+	/** @pending_list_size: Size of the pending list snapshot array */
+	int pending_list_size;
+	/** @pending_list: snapshot of the pending list info */
+	struct pending_list_snapshot *pending_list;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
new file mode 100644
index 000000000000..cd80802e8918
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_TYPES_H_
+#define _XE_GUC_TYPES_H_
+
+#include <linux/idr.h>
+#include <linux/xarray.h>
+
+#include "regs/xe_reg_defs.h"
+#include "xe_guc_ads_types.h"
+#include "xe_guc_ct_types.h"
+#include "xe_guc_fwif.h"
+#include "xe_guc_log_types.h"
+#include "xe_guc_pc_types.h"
+#include "xe_uc_fw_types.h"
+
+/**
+ * struct xe_guc - Graphic micro controller
+ */
+struct xe_guc {
+	/** @fw: Generic uC firmware management */
+	struct xe_uc_fw fw;
+	/** @log: GuC log */
+	struct xe_guc_log log;
+	/** @ads: GuC ads */
+	struct xe_guc_ads ads;
+	/** @ct: GuC ct */
+	struct xe_guc_ct ct;
+	/** @pc: GuC Power Conservation */
+	struct xe_guc_pc pc;
+	/** @submission_state: GuC submission state */
+	struct {
+		/** @exec_queue_lookup: Lookup an xe_engine from guc_id */
+		struct xarray exec_queue_lookup;
+		/** @guc_ids: used to allocate new guc_ids, single-lrc */
+		struct ida guc_ids;
+		/** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */
+		unsigned long *guc_ids_bitmap;
+		/** @stopped: submissions are stopped */
+		atomic_t stopped;
+		/** @lock: protects submission state */
+		struct mutex lock;
+		/** @suspend: suspend fence state */
+		struct {
+			/** @lock: suspend fences lock */
+			spinlock_t lock;
+			/** @context: suspend fences context */
+			u64 context;
+			/** @seqno: suspend fences seqno */
+			u32 seqno;
+		} suspend;
+#ifdef CONFIG_PROVE_LOCKING
+#define NUM_SUBMIT_WQ	256
+		/** @submit_wq_pool: submission ordered workqueues pool */
+		struct workqueue_struct *submit_wq_pool[NUM_SUBMIT_WQ];
+		/** @submit_wq_idx: submission ordered workqueue index */
+		int submit_wq_idx;
+#endif
+		/** @enabled: submission is enabled */
+		bool enabled;
+	} submission_state;
+	/** @hwconfig: Hardware config state */
+	struct {
+		/** @bo: buffer object of the hardware config */
+		struct xe_bo *bo;
+		/** @size: size of the hardware config */
+		u32 size;
+	} hwconfig;
+
+	/**
+	 * @notify_reg: Register which is written to notify GuC of H2G messages
+	 */
+	struct xe_reg notify_reg;
+	/** @params: Control params for fw initialization */
+	u32 params[GUC_CTL_MAX_DWORDS];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
new file mode 100644
index 000000000000..bfdd33b9b23b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright(c) 2023, Intel Corporation. All rights reserved.
+ */
+
+#include <linux/irq.h>
+#include <linux/mei_aux.h>
+#include <linux/pci.h>
+#include <linux/sizes.h>
+
+#include "xe_device_types.h"
+#include "xe_drv.h"
+#include "xe_heci_gsc.h"
+#include "xe_platform_types.h"
+
+#define GSC_BAR_LENGTH  0x00000FFC
+
+#define DG1_GSC_HECI2_BASE			0x259000
+#define PVC_GSC_HECI2_BASE			0x285000
+#define DG2_GSC_HECI2_BASE			0x374000
+
+static void heci_gsc_irq_mask(struct irq_data *d)
+{
+	/* generic irq handling */
+}
+
+static void heci_gsc_irq_unmask(struct irq_data *d)
+{
+	/* generic irq handling */
+}
+
+static struct irq_chip heci_gsc_irq_chip = {
+	.name = "gsc_irq_chip",
+	.irq_mask = heci_gsc_irq_mask,
+	.irq_unmask = heci_gsc_irq_unmask,
+};
+
+static int heci_gsc_irq_init(int irq)
+{
+	irq_set_chip_and_handler_name(irq, &heci_gsc_irq_chip,
+				      handle_simple_irq, "heci_gsc_irq_handler");
+
+	return irq_set_chip_data(irq, NULL);
+}
+
+/**
+ * struct heci_gsc_def - graphics security controller heci interface definitions
+ *
+ * @name: name of the heci device
+ * @bar: address of the mmio bar
+ * @bar_size: size of the mmio bar
+ * @use_polling: indication of using polling mode for the device
+ * @slow_firmware: indication of whether the device is slow (needs longer timeouts)
+ */
+struct heci_gsc_def {
+	const char *name;
+	unsigned long bar;
+	size_t bar_size;
+	bool use_polling;
+	bool slow_firmware;
+};
+
+/* gsc resources and definitions */
+static const struct heci_gsc_def heci_gsc_def_dg1 = {
+	.name = "mei-gscfi",
+	.bar = DG1_GSC_HECI2_BASE,
+	.bar_size = GSC_BAR_LENGTH,
+};
+
+static const struct heci_gsc_def heci_gsc_def_dg2 = {
+	.name = "mei-gscfi",
+	.bar = DG2_GSC_HECI2_BASE,
+	.bar_size = GSC_BAR_LENGTH,
+};
+
+static const struct heci_gsc_def heci_gsc_def_pvc = {
+	.name = "mei-gscfi",
+	.bar = PVC_GSC_HECI2_BASE,
+	.bar_size = GSC_BAR_LENGTH,
+	.slow_firmware = true,
+};
+
+static void heci_gsc_release_dev(struct device *dev)
+{
+	struct auxiliary_device *aux_dev = to_auxiliary_dev(dev);
+	struct mei_aux_device *adev = auxiliary_dev_to_mei_aux_dev(aux_dev);
+
+	kfree(adev);
+}
+
+void xe_heci_gsc_fini(struct xe_device *xe)
+{
+	struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
+
+	if (!HAS_HECI_GSCFI(xe))
+		return;
+
+	if (heci_gsc->adev) {
+		struct auxiliary_device *aux_dev = &heci_gsc->adev->aux_dev;
+
+		auxiliary_device_delete(aux_dev);
+		auxiliary_device_uninit(aux_dev);
+		heci_gsc->adev = NULL;
+	}
+
+	if (heci_gsc->irq >= 0)
+		irq_free_desc(heci_gsc->irq);
+	heci_gsc->irq = -1;
+}
+
+static int heci_gsc_irq_setup(struct xe_device *xe)
+{
+	struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
+	int ret;
+
+	heci_gsc->irq = irq_alloc_desc(0);
+	if (heci_gsc->irq < 0) {
+		drm_err(&xe->drm, "gsc irq error %d\n", heci_gsc->irq);
+		return heci_gsc->irq;
+	}
+
+	ret = heci_gsc_irq_init(heci_gsc->irq);
+	if (ret < 0)
+		drm_err(&xe->drm, "gsc irq init failed %d\n", ret);
+
+	return ret;
+}
+
+static int heci_gsc_add_device(struct xe_device *xe, const struct heci_gsc_def *def)
+{
+	struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct auxiliary_device *aux_dev;
+	struct mei_aux_device *adev;
+	int ret;
+
+	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+	if (!adev)
+		return -ENOMEM;
+	adev->irq = heci_gsc->irq;
+	adev->bar.parent = &pdev->resource[0];
+	adev->bar.start = def->bar + pdev->resource[0].start;
+	adev->bar.end = adev->bar.start + def->bar_size - 1;
+	adev->bar.flags = IORESOURCE_MEM;
+	adev->bar.desc = IORES_DESC_NONE;
+	adev->slow_firmware = def->slow_firmware;
+
+	aux_dev = &adev->aux_dev;
+	aux_dev->name = def->name;
+	aux_dev->id = (pci_domain_nr(pdev->bus) << 16) |
+		      PCI_DEVID(pdev->bus->number, pdev->devfn);
+	aux_dev->dev.parent = &pdev->dev;
+	aux_dev->dev.release = heci_gsc_release_dev;
+
+	ret = auxiliary_device_init(aux_dev);
+	if (ret < 0) {
+		drm_err(&xe->drm, "gsc aux init failed %d\n", ret);
+		kfree(adev);
+		return ret;
+	}
+
+	heci_gsc->adev = adev; /* needed by the notifier */
+	ret = auxiliary_device_add(aux_dev);
+	if (ret < 0) {
+		drm_err(&xe->drm, "gsc aux add failed %d\n", ret);
+		heci_gsc->adev = NULL;
+
+		/* adev will be freed with the put_device() and .release sequence */
+		auxiliary_device_uninit(aux_dev);
+	}
+	return ret;
+}
+
+void xe_heci_gsc_init(struct xe_device *xe)
+{
+	struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
+	const struct heci_gsc_def *def;
+	int ret;
+
+	if (!HAS_HECI_GSCFI(xe))
+		return;
+
+	heci_gsc->irq = -1;
+
+	if (xe->info.platform == XE_PVC) {
+		def = &heci_gsc_def_pvc;
+	} else if (xe->info.platform == XE_DG2) {
+		def = &heci_gsc_def_dg2;
+	} else if (xe->info.platform == XE_DG1) {
+		def = &heci_gsc_def_dg1;
+	} else {
+		drm_warn_once(&xe->drm, "Unknown platform\n");
+		return;
+	}
+
+	if (!def->name) {
+		drm_warn_once(&xe->drm, "HECI is not implemented!\n");
+		return;
+	}
+
+	if (!def->use_polling) {
+		ret = heci_gsc_irq_setup(xe);
+		if (ret)
+			goto fail;
+	}
+
+	ret = heci_gsc_add_device(xe, def);
+	if (ret)
+		goto fail;
+
+	return;
+fail:
+	xe_heci_gsc_fini(xe);
+}
+
+void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir)
+{
+	int ret;
+
+	if ((iir & GSC_IRQ_INTF(1)) == 0)
+		return;
+
+	if (!HAS_HECI_GSCFI(xe)) {
+		drm_warn_once(&xe->drm, "GSC irq: not supported");
+		return;
+	}
+
+	if (xe->heci_gsc.irq < 0)
+		return;
+
+	ret = generic_handle_irq(xe->heci_gsc.irq);
+	if (ret)
+		drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret);
+}
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.h b/drivers/gpu/drm/xe/xe_heci_gsc.h
new file mode 100644
index 000000000000..9db454478fae
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright(c) 2023, Intel Corporation. All rights reserved.
+ */
+#ifndef __XE_HECI_GSC_DEV_H__
+#define __XE_HECI_GSC_DEV_H__
+
+#include <linux/types.h>
+
+struct xe_device;
+struct mei_aux_device;
+
+/*
+ * The HECI1 bit corresponds to bit15 and HECI2 to bit14.
+ * The reason for this is to allow growth for more interfaces in the future.
+ */
+#define GSC_IRQ_INTF(_x)  BIT(15 - (_x))
+
+/**
+ * struct xe_heci_gsc - graphics security controller for xe, HECI interface
+ *
+ * @adev : pointer to mei auxiliary device structure
+ * @irq : irq number
+ *
+ */
+struct xe_heci_gsc {
+	struct mei_aux_device *adev;
+	int irq;
+};
+
+void xe_heci_gsc_init(struct xe_device *xe);
+void xe_heci_gsc_fini(struct xe_device *xe);
+void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir);
+
+#endif /* __XE_HECI_GSC_DEV_H__ */
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
new file mode 100644
index 000000000000..eca109791c6a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_huc.h"
+
+#include <drm/drm_managed.h>
+
+#include "abi/gsc_pxp_commands_abi.h"
+#include "regs/xe_gsc_regs.h"
+#include "regs/xe_guc_regs.h"
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gsc_submit.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_uc_fw.h"
+
+static struct xe_gt *
+huc_to_gt(struct xe_huc *huc)
+{
+	return container_of(huc, struct xe_gt, uc.huc);
+}
+
+static struct xe_device *
+huc_to_xe(struct xe_huc *huc)
+{
+	return gt_to_xe(huc_to_gt(huc));
+}
+
+static struct xe_guc *
+huc_to_guc(struct xe_huc *huc)
+{
+	return &container_of(huc, struct xe_uc, huc)->guc;
+}
+
+static void free_gsc_pkt(struct drm_device *drm, void *arg)
+{
+	struct xe_huc *huc = arg;
+
+	xe_bo_unpin_map_no_vm(huc->gsc_pkt);
+	huc->gsc_pkt = NULL;
+}
+
+#define PXP43_HUC_AUTH_INOUT_SIZE SZ_4K
+static int huc_alloc_gsc_pkt(struct xe_huc *huc)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *bo;
+	int err;
+
+	/* we use a single object for both input and output */
+	bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL,
+				  PXP43_HUC_AUTH_INOUT_SIZE * 2,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_SYSTEM_BIT |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	huc->gsc_pkt = bo;
+
+	err = drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc);
+	if (err) {
+		free_gsc_pkt(&xe->drm, huc);
+		return err;
+	}
+
+	return 0;
+}
+
+int xe_huc_init(struct xe_huc *huc)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	int ret;
+
+	huc->fw.type = XE_UC_FW_TYPE_HUC;
+
+	/* On platforms with a media GT the HuC is only available there */
+	if (tile->media_gt && (gt != tile->media_gt)) {
+		xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED);
+		return 0;
+	}
+
+	ret = xe_uc_fw_init(&huc->fw);
+	if (ret)
+		goto out;
+
+	if (!xe_uc_fw_is_enabled(&huc->fw))
+		return 0;
+
+	if (huc->fw.has_gsc_headers) {
+		ret = huc_alloc_gsc_pkt(huc);
+		if (ret)
+			goto out;
+	}
+
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
+
+	return 0;
+
+out:
+	drm_err(&xe->drm, "HuC init failed with %d", ret);
+	return ret;
+}
+
+int xe_huc_upload(struct xe_huc *huc)
+{
+	if (!xe_uc_fw_is_loadable(&huc->fw))
+		return 0;
+	return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
+}
+
+#define huc_auth_msg_wr(xe_, map_, offset_, field_, val_) \
+	xe_map_wr_field(xe_, map_, offset_, struct pxp43_new_huc_auth_in, field_, val_)
+#define huc_auth_msg_rd(xe_, map_, offset_, field_) \
+	xe_map_rd_field(xe_, map_, offset_, struct pxp43_huc_auth_out, field_)
+
+static u32 huc_emit_pxp_auth_msg(struct xe_device *xe, struct iosys_map *map,
+				 u32 wr_offset, u32 huc_offset, u32 huc_size)
+{
+	xe_map_memset(xe, map, wr_offset, 0, sizeof(struct pxp43_new_huc_auth_in));
+
+	huc_auth_msg_wr(xe, map, wr_offset, header.api_version, PXP_APIVER(4, 3));
+	huc_auth_msg_wr(xe, map, wr_offset, header.command_id, PXP43_CMDID_NEW_HUC_AUTH);
+	huc_auth_msg_wr(xe, map, wr_offset, header.status, 0);
+	huc_auth_msg_wr(xe, map, wr_offset, header.buffer_len,
+			sizeof(struct pxp43_new_huc_auth_in) - sizeof(struct pxp_cmd_header));
+	huc_auth_msg_wr(xe, map, wr_offset, huc_base_address, huc_offset);
+	huc_auth_msg_wr(xe, map, wr_offset, huc_size, huc_size);
+
+	return wr_offset + sizeof(struct pxp43_new_huc_auth_in);
+}
+
+static int huc_auth_via_gsccs(struct xe_huc *huc)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *pkt = huc->gsc_pkt;
+	u32 wr_offset;
+	u32 rd_offset;
+	u64 ggtt_offset;
+	u32 out_status;
+	int retry = 5;
+	int err = 0;
+
+	if (!pkt)
+		return -ENODEV;
+
+	ggtt_offset = xe_bo_ggtt_addr(pkt);
+
+	wr_offset = xe_gsc_emit_header(xe, &pkt->vmap, 0, HECI_MEADDRESS_PXP, 0,
+				       sizeof(struct pxp43_new_huc_auth_in));
+	wr_offset = huc_emit_pxp_auth_msg(xe, &pkt->vmap, wr_offset,
+					  xe_bo_ggtt_addr(huc->fw.bo),
+					  huc->fw.bo->size);
+	do {
+		err = xe_gsc_pkt_submit_kernel(&gt->uc.gsc, ggtt_offset, wr_offset,
+					       ggtt_offset + PXP43_HUC_AUTH_INOUT_SIZE,
+					       PXP43_HUC_AUTH_INOUT_SIZE);
+		if (err)
+			break;
+
+		if (xe_gsc_check_and_update_pending(xe, &pkt->vmap, 0, &pkt->vmap,
+						    PXP43_HUC_AUTH_INOUT_SIZE)) {
+			err = -EBUSY;
+			msleep(50);
+		}
+	} while (--retry && err == -EBUSY);
+
+	if (err) {
+		drm_err(&xe->drm, "failed to submit GSC request to auth: %d\n", err);
+		return err;
+	}
+
+	err = xe_gsc_read_out_header(xe, &pkt->vmap, PXP43_HUC_AUTH_INOUT_SIZE,
+				     sizeof(struct pxp43_huc_auth_out), &rd_offset);
+	if (err) {
+		drm_err(&xe->drm, "HuC: invalid GSC reply for auth (err=%d)\n", err);
+		return err;
+	}
+
+	/*
+	 * The GSC will return PXP_STATUS_OP_NOT_PERMITTED if the HuC is already
+	 * authenticated. If the same error is ever returned with HuC not loaded
+	 * we'll still catch it when we check the authentication bit later.
+	 */
+	out_status = huc_auth_msg_rd(xe, &pkt->vmap, rd_offset, header.status);
+	if (out_status != PXP_STATUS_SUCCESS && out_status != PXP_STATUS_OP_NOT_PERMITTED) {
+		drm_err(&xe->drm, "auth failed with GSC error = 0x%x\n", out_status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static const struct {
+	const char *name;
+	struct xe_reg reg;
+	u32 val;
+} huc_auth_modes[XE_HUC_AUTH_TYPES_COUNT] = {
+	[XE_HUC_AUTH_VIA_GUC] = { "GuC",
+				  HUC_KERNEL_LOAD_INFO,
+				  HUC_LOAD_SUCCESSFUL },
+	[XE_HUC_AUTH_VIA_GSC] = { "GSC",
+				  HECI_FWSTS5(MTL_GSC_HECI1_BASE),
+				  HECI1_FWSTS5_HUC_AUTH_DONE },
+};
+
+bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+
+	return xe_mmio_read32(gt, huc_auth_modes[type].reg) & huc_auth_modes[type].val;
+}
+
+int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type)
+{
+	struct xe_device *xe = huc_to_xe(huc);
+	struct xe_gt *gt = huc_to_gt(huc);
+	struct xe_guc *guc = huc_to_guc(huc);
+	int ret;
+
+	if (!xe_uc_fw_is_loadable(&huc->fw))
+		return 0;
+
+	/* On newer platforms the HuC survives reset, so no need to re-auth */
+	if (xe_huc_is_authenticated(huc, type)) {
+		xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING);
+		return 0;
+	}
+
+	if (!xe_uc_fw_is_loaded(&huc->fw))
+		return -ENOEXEC;
+
+	switch (type) {
+	case XE_HUC_AUTH_VIA_GUC:
+		ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) +
+				      xe_uc_fw_rsa_offset(&huc->fw));
+		break;
+	case XE_HUC_AUTH_VIA_GSC:
+		ret = huc_auth_via_gsccs(huc);
+		break;
+	default:
+		XE_WARN_ON(type);
+		return -EINVAL;
+	}
+	if (ret) {
+		drm_err(&xe->drm, "Failed to trigger HuC auth via %s: %d\n",
+			huc_auth_modes[type].name, ret);
+		goto fail;
+	}
+
+	ret = xe_mmio_wait32(gt, huc_auth_modes[type].reg, huc_auth_modes[type].val,
+			     huc_auth_modes[type].val, 100000, NULL, false);
+	if (ret) {
+		drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret);
+		goto fail;
+	}
+
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING);
+	drm_dbg(&xe->drm, "HuC authenticated via %s\n", huc_auth_modes[type].name);
+
+	return 0;
+
+fail:
+	drm_err(&xe->drm, "HuC: Auth via %s failed: %d\n",
+		huc_auth_modes[type].name, ret);
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+
+	return ret;
+}
+
+void xe_huc_sanitize(struct xe_huc *huc)
+{
+	if (!xe_uc_fw_is_loadable(&huc->fw))
+		return;
+	xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
+}
+
+void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
+{
+	struct xe_gt *gt = huc_to_gt(huc);
+	int err;
+
+	xe_uc_fw_print(&huc->fw, p);
+
+	if (!xe_uc_fw_is_enabled(&huc->fw))
+		return;
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return;
+
+	drm_printf(p, "\nHuC status: 0x%08x\n",
+		   xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO));
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h
new file mode 100644
index 000000000000..532017230287
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_H_
+#define _XE_HUC_H_
+
+#include "xe_huc_types.h"
+
+struct drm_printer;
+
+enum xe_huc_auth_types {
+	XE_HUC_AUTH_VIA_GUC = 0,
+	XE_HUC_AUTH_VIA_GSC,
+	XE_HUC_AUTH_TYPES_COUNT
+};
+
+int xe_huc_init(struct xe_huc *huc);
+int xe_huc_upload(struct xe_huc *huc);
+int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type);
+bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type);
+void xe_huc_sanitize(struct xe_huc *huc);
+void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c
new file mode 100644
index 000000000000..18585a7eeb9d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_huc_debugfs.h"
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_huc.h"
+#include "xe_macros.h"
+
+static struct xe_gt *
+huc_to_gt(struct xe_huc *huc)
+{
+	return container_of(huc, struct xe_gt, uc.huc);
+}
+
+static struct xe_device *
+huc_to_xe(struct xe_huc *huc)
+{
+	return gt_to_xe(huc_to_gt(huc));
+}
+
+static struct xe_huc *node_to_huc(struct drm_info_node *node)
+{
+	return node->info_ent->data;
+}
+
+static int huc_info(struct seq_file *m, void *data)
+{
+	struct xe_huc *huc = node_to_huc(m->private);
+	struct xe_device *xe = huc_to_xe(huc);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_device_mem_access_get(xe);
+	xe_huc_print_info(huc, &p);
+	xe_device_mem_access_put(xe);
+
+	return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+	{"huc_info", huc_info, 0},
+};
+
+void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent)
+{
+	struct drm_minor *minor = huc_to_xe(huc)->drm.primary;
+	struct drm_info_list *local;
+	int i;
+
+#define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
+	local = drmm_kmalloc(&huc_to_xe(huc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+	if (!local)
+		return;
+
+	memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+		local[i].data = huc;
+
+	drm_debugfs_create_files(local,
+				 ARRAY_SIZE(debugfs_list),
+				 parent, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.h b/drivers/gpu/drm/xe/xe_huc_debugfs.h
new file mode 100644
index 000000000000..ec58f1818804
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_DEBUGFS_H_
+#define _XE_HUC_DEBUGFS_H_
+
+struct dentry;
+struct xe_huc;
+
+void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_huc_types.h b/drivers/gpu/drm/xe/xe_huc_types.h
new file mode 100644
index 000000000000..cfbaa5e0dfca
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_types.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_TYPES_H_
+#define _XE_HUC_TYPES_H_
+
+#include "xe_uc_fw_types.h"
+
+struct xe_bo;
+
+/**
+ * struct xe_huc - HuC
+ */
+struct xe_huc {
+	/** @fw: Generic uC firmware management */
+	struct xe_uc_fw fw;
+
+	/** @gsc_pkt: bo to store the packet for auth via GSC */
+	struct xe_bo *gsc_pkt;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
new file mode 100644
index 000000000000..1fa5cf5eea97
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -0,0 +1,883 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_hw_engine.h"
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_execlist.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_ccs_mode.h"
+#include "xe_gt_topology.h"
+#include "xe_hw_fence.h"
+#include "xe_irq.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_reg_sr.h"
+#include "xe_rtp.h"
+#include "xe_sched_job.h"
+#include "xe_tuning.h"
+#include "xe_uc_fw.h"
+#include "xe_wa.h"
+
+#define MAX_MMIO_BASES 3
+struct engine_info {
+	const char *name;
+	unsigned int class : 8;
+	unsigned int instance : 8;
+	enum xe_force_wake_domains domain;
+	u32 mmio_base;
+};
+
+static const struct engine_info engine_infos[] = {
+	[XE_HW_ENGINE_RCS0] = {
+		.name = "rcs0",
+		.class = XE_ENGINE_CLASS_RENDER,
+		.instance = 0,
+		.domain = XE_FW_RENDER,
+		.mmio_base = RENDER_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS0] = {
+		.name = "bcs0",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 0,
+		.domain = XE_FW_RENDER,
+		.mmio_base = BLT_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS1] = {
+		.name = "bcs1",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 1,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS1_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS2] = {
+		.name = "bcs2",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 2,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS2_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS3] = {
+		.name = "bcs3",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 3,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS3_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS4] = {
+		.name = "bcs4",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 4,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS4_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS5] = {
+		.name = "bcs5",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 5,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS5_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS6] = {
+		.name = "bcs6",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 6,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS6_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS7] = {
+		.name = "bcs7",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 7,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS7_RING_BASE,
+	},
+	[XE_HW_ENGINE_BCS8] = {
+		.name = "bcs8",
+		.class = XE_ENGINE_CLASS_COPY,
+		.instance = 8,
+		.domain = XE_FW_RENDER,
+		.mmio_base = XEHPC_BCS8_RING_BASE,
+	},
+
+	[XE_HW_ENGINE_VCS0] = {
+		.name = "vcs0",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 0,
+		.domain = XE_FW_MEDIA_VDBOX0,
+		.mmio_base = BSD_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS1] = {
+		.name = "vcs1",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 1,
+		.domain = XE_FW_MEDIA_VDBOX1,
+		.mmio_base = BSD2_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS2] = {
+		.name = "vcs2",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 2,
+		.domain = XE_FW_MEDIA_VDBOX2,
+		.mmio_base = BSD3_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS3] = {
+		.name = "vcs3",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 3,
+		.domain = XE_FW_MEDIA_VDBOX3,
+		.mmio_base = BSD4_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS4] = {
+		.name = "vcs4",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 4,
+		.domain = XE_FW_MEDIA_VDBOX4,
+		.mmio_base = XEHP_BSD5_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS5] = {
+		.name = "vcs5",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 5,
+		.domain = XE_FW_MEDIA_VDBOX5,
+		.mmio_base = XEHP_BSD6_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS6] = {
+		.name = "vcs6",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 6,
+		.domain = XE_FW_MEDIA_VDBOX6,
+		.mmio_base = XEHP_BSD7_RING_BASE,
+	},
+	[XE_HW_ENGINE_VCS7] = {
+		.name = "vcs7",
+		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
+		.instance = 7,
+		.domain = XE_FW_MEDIA_VDBOX7,
+		.mmio_base = XEHP_BSD8_RING_BASE,
+	},
+	[XE_HW_ENGINE_VECS0] = {
+		.name = "vecs0",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 0,
+		.domain = XE_FW_MEDIA_VEBOX0,
+		.mmio_base = VEBOX_RING_BASE,
+	},
+	[XE_HW_ENGINE_VECS1] = {
+		.name = "vecs1",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 1,
+		.domain = XE_FW_MEDIA_VEBOX1,
+		.mmio_base = VEBOX2_RING_BASE,
+	},
+	[XE_HW_ENGINE_VECS2] = {
+		.name = "vecs2",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 2,
+		.domain = XE_FW_MEDIA_VEBOX2,
+		.mmio_base = XEHP_VEBOX3_RING_BASE,
+	},
+	[XE_HW_ENGINE_VECS3] = {
+		.name = "vecs3",
+		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+		.instance = 3,
+		.domain = XE_FW_MEDIA_VEBOX3,
+		.mmio_base = XEHP_VEBOX4_RING_BASE,
+	},
+	[XE_HW_ENGINE_CCS0] = {
+		.name = "ccs0",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 0,
+		.domain = XE_FW_RENDER,
+		.mmio_base = COMPUTE0_RING_BASE,
+	},
+	[XE_HW_ENGINE_CCS1] = {
+		.name = "ccs1",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 1,
+		.domain = XE_FW_RENDER,
+		.mmio_base = COMPUTE1_RING_BASE,
+	},
+	[XE_HW_ENGINE_CCS2] = {
+		.name = "ccs2",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 2,
+		.domain = XE_FW_RENDER,
+		.mmio_base = COMPUTE2_RING_BASE,
+	},
+	[XE_HW_ENGINE_CCS3] = {
+		.name = "ccs3",
+		.class = XE_ENGINE_CLASS_COMPUTE,
+		.instance = 3,
+		.domain = XE_FW_RENDER,
+		.mmio_base = COMPUTE3_RING_BASE,
+	},
+	[XE_HW_ENGINE_GSCCS0] = {
+		.name = "gsccs0",
+		.class = XE_ENGINE_CLASS_OTHER,
+		.instance = OTHER_GSC_INSTANCE,
+		.domain = XE_FW_GSC,
+		.mmio_base = GSCCS_RING_BASE,
+	},
+};
+
+static void hw_engine_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_hw_engine *hwe = arg;
+
+	if (hwe->exl_port)
+		xe_execlist_port_destroy(hwe->exl_port);
+	xe_lrc_finish(&hwe->kernel_lrc);
+
+	hwe->gt = NULL;
+}
+
+static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
+				   u32 val)
+{
+	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
+	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
+
+	reg.addr += hwe->mmio_base;
+
+	xe_mmio_write32(hwe->gt, reg, val);
+}
+
+static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
+{
+	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
+	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
+
+	reg.addr += hwe->mmio_base;
+
+	return xe_mmio_read32(hwe->gt, reg);
+}
+
+void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
+{
+	u32 ccs_mask =
+		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
+
+	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
+		xe_mmio_write32(hwe->gt, RCU_MODE,
+				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
+
+	hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
+	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
+			       xe_bo_ggtt_addr(hwe->hwsp));
+	hw_engine_mmio_write32(hwe, RING_MODE(0),
+			       _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
+	hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
+			       _MASKED_BIT_DISABLE(STOP_RING));
+	hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
+}
+
+static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
+						 const struct xe_hw_engine *hwe)
+{
+	return xe_gt_ccs_mode_enabled(gt) &&
+	       xe_rtp_match_first_render_or_compute(gt, hwe);
+}
+
+void
+xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	const u8 mocs_write_idx = gt->mocs.uc_index;
+	const u8 mocs_read_idx = gt->mocs.uc_index;
+	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
+			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+	const struct xe_rtp_entry_sr lrc_was[] = {
+		/*
+		 * Some blitter commands do not have a field for MOCS, those
+		 * commands will use MOCS index pointed by BLIT_CCTL.
+		 * BLIT_CCTL registers are needed to be programmed to un-cached.
+		 */
+		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
+		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
+			       ENGINE_CLASS(COPY)),
+		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
+				 BLIT_CCTL_DST_MOCS_MASK |
+				 BLIT_CCTL_SRC_MOCS_MASK,
+				 blit_cctl_val,
+				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+		},
+		/* Use Fixed slice CCS mode */
+		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
+		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
+		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
+					   RCU_MODE_FIXED_SLICE_CCS_MODE))
+		},
+		{}
+	};
+
+	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
+}
+
+static void
+hw_engine_setup_default_state(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	/*
+	 * RING_CMD_CCTL specifies the default MOCS entry that will be
+	 * used by the command streamer when executing commands that
+	 * don't have a way to explicitly specify a MOCS setting.
+	 * The default should usually reference whichever MOCS entry
+	 * corresponds to uncached behavior, although use of a WB cached
+	 * entry is recommended by the spec in certain circumstances on
+	 * specific platforms.
+	 * Bspec: 72161
+	 */
+	const u8 mocs_write_idx = gt->mocs.uc_index;
+	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
+				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
+				 gt->mocs.wb_index : gt->mocs.uc_index;
+	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
+				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+	const struct xe_rtp_entry_sr engine_entries[] = {
+		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
+		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
+		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
+					   CMD_CCTL_WRITE_OVERRIDE_MASK |
+					   CMD_CCTL_READ_OVERRIDE_MASK,
+					   ring_cmd_cctl_val,
+					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+		},
+		/*
+		 * To allow the GSC engine to go idle on MTL we need to enable
+		 * idle messaging and set the hysteresis value (we use 0xA=5us
+		 * as recommended in spec). On platforms after MTL this is
+		 * enabled by default.
+		 */
+		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
+		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
+		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
+				     IDLE_MSG_DISABLE,
+				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
+				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
+					   IDLE_WAIT_TIME,
+					   0xA,
+					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+		},
+		{}
+	};
+
+	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
+}
+
+static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
+				 enum xe_hw_engine_id id)
+{
+	const struct engine_info *info;
+
+	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
+		return;
+
+	if (!(gt->info.engine_mask & BIT(id)))
+		return;
+
+	info = &engine_infos[id];
+
+	xe_gt_assert(gt, !hwe->gt);
+
+	hwe->gt = gt;
+	hwe->class = info->class;
+	hwe->instance = info->instance;
+	hwe->mmio_base = info->mmio_base;
+	hwe->domain = info->domain;
+	hwe->name = info->name;
+	hwe->fence_irq = &gt->fence_irq[info->class];
+	hwe->engine_id = id;
+
+	hwe->eclass = &gt->eclass[hwe->class];
+	if (!hwe->eclass->sched_props.job_timeout_ms) {
+		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
+		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
+		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
+		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
+		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
+		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
+		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
+		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
+		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
+		/* Record default props */
+		hwe->eclass->defaults = hwe->eclass->sched_props;
+	}
+
+	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
+	xe_tuning_process_engine(hwe);
+	xe_wa_process_engine(hwe);
+	hw_engine_setup_default_state(hwe);
+
+	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
+	xe_reg_whitelist_process_engine(hwe);
+}
+
+static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
+			  enum xe_hw_engine_id id)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
+	int err;
+
+	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
+	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
+
+	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
+	xe_reg_sr_apply_whitelist(hwe);
+
+	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
+						 XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+						 XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(hwe->hwsp)) {
+		err = PTR_ERR(hwe->hwsp);
+		goto err_name;
+	}
+
+	err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K);
+	if (err)
+		goto err_hwsp;
+
+	if (!xe_device_uc_enabled(xe)) {
+		hwe->exl_port = xe_execlist_port_create(xe, hwe);
+		if (IS_ERR(hwe->exl_port)) {
+			err = PTR_ERR(hwe->exl_port);
+			goto err_kernel_lrc;
+		}
+	}
+
+	if (xe_device_uc_enabled(xe))
+		xe_hw_engine_enable_ring(hwe);
+
+	/* We reserve the highest BCS instance for USM */
+	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
+		gt->usm.reserved_bcs_instance = hwe->instance;
+
+	err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
+	if (err)
+		return err;
+
+	return 0;
+
+err_kernel_lrc:
+	xe_lrc_finish(&hwe->kernel_lrc);
+err_hwsp:
+	xe_bo_unpin_map_no_vm(hwe->hwsp);
+err_name:
+	hwe->name = NULL;
+
+	return err;
+}
+
+static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
+{
+	int class;
+
+	/* FIXME: Doing a simple logical mapping that works for most hardware */
+	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+		struct xe_hw_engine *hwe;
+		enum xe_hw_engine_id id;
+		int logical_instance = 0;
+
+		for_each_hw_engine(hwe, gt, id)
+			if (hwe->class == class)
+				hwe->logical_instance = logical_instance++;
+	}
+}
+
+static void read_media_fuses(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 media_fuse;
+	u16 vdbox_mask;
+	u16 vebox_mask;
+	int i, j;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
+
+	/*
+	 * Pre-Xe_HP platforms had register bits representing absent engines,
+	 * whereas Xe_HP and beyond have bits representing present engines.
+	 * Invert the polarity on old platforms so that we can use common
+	 * handling below.
+	 */
+	if (GRAPHICS_VERx100(xe) < 1250)
+		media_fuse = ~media_fuse;
+
+	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
+	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
+
+	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!(BIT(j) & vdbox_mask)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "vcs%u fused off\n", j);
+		}
+	}
+
+	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!(BIT(j) & vebox_mask)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "vecs%u fused off\n", j);
+		}
+	}
+}
+
+static void read_copy_fuses(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 bcs_mask;
+
+	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
+		return;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
+	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
+
+	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
+	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!(BIT(j / 2) & bcs_mask)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "bcs%u fused off\n", j);
+		}
+	}
+}
+
+static void read_compute_fuses_from_dss(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	/*
+	 * CCS fusing based on DSS masks only applies to platforms that can
+	 * have more than one CCS.
+	 */
+	if (hweight64(gt->info.engine_mask &
+		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
+		return;
+
+	/*
+	 * CCS availability on Xe_HP is inferred from the presence of DSS in
+	 * each quadrant.
+	 */
+	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "ccs%u fused off\n", j);
+		}
+	}
+}
+
+static void read_compute_fuses_from_reg(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 ccs_mask;
+
+	ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
+	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
+
+	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
+		if (!(gt->info.engine_mask & BIT(i)))
+			continue;
+
+		if ((ccs_mask & BIT(j)) == 0) {
+			gt->info.engine_mask &= ~BIT(i);
+			drm_info(&xe->drm, "ccs%u fused off\n", j);
+		}
+	}
+}
+
+static void read_compute_fuses(struct xe_gt *gt)
+{
+	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
+		read_compute_fuses_from_reg(gt);
+	else
+		read_compute_fuses_from_dss(gt);
+}
+
+static void check_gsc_availability(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
+		return;
+
+	/*
+	 * The GSCCS is only used to communicate with the GSC FW, so if we don't
+	 * have the FW there is nothing we need the engine for and can therefore
+	 * skip its initialization.
+	 */
+	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
+		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
+		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
+	}
+}
+
+int xe_hw_engines_init_early(struct xe_gt *gt)
+{
+	int i;
+
+	read_media_fuses(gt);
+	read_copy_fuses(gt);
+	read_compute_fuses(gt);
+	check_gsc_availability(gt);
+
+	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
+	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
+
+	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
+		hw_engine_init_early(gt, &gt->hw_engines[i], i);
+
+	return 0;
+}
+
+int xe_hw_engines_init(struct xe_gt *gt)
+{
+	int err;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id) {
+		err = hw_engine_init(gt, hwe, id);
+		if (err)
+			return err;
+	}
+
+	hw_engine_setup_logical_mapping(gt);
+
+	return 0;
+}
+
+void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
+{
+	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
+
+	if (hwe->irq_handler)
+		hwe->irq_handler(hwe, intr_vec);
+
+	if (intr_vec & GT_RENDER_USER_INTERRUPT)
+		xe_hw_fence_irq_run(hwe->fence_irq);
+}
+
+/**
+ * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
+ * @hwe: Xe HW Engine.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a Xe HW Engine snapshot object that must be freed by the
+ * caller, using `xe_hw_engine_snapshot_free`.
+ */
+struct xe_hw_engine_snapshot *
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
+{
+	struct xe_hw_engine_snapshot *snapshot;
+	int len;
+
+	if (!xe_hw_engine_is_valid(hwe))
+		return NULL;
+
+	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
+
+	if (!snapshot)
+		return NULL;
+
+	len = strlen(hwe->name) + 1;
+	snapshot->name = kzalloc(len, GFP_ATOMIC);
+	if (snapshot->name)
+		strscpy(snapshot->name, hwe->name, len);
+
+	snapshot->class = hwe->class;
+	snapshot->logical_instance = hwe->logical_instance;
+	snapshot->forcewake.domain = hwe->domain;
+	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
+						    hwe->domain);
+	snapshot->mmio_base = hwe->mmio_base;
+
+	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
+	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe,
+							   RING_HWS_PGA(0));
+	snapshot->reg.ring_execlist_status_lo =
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
+	snapshot->reg.ring_execlist_status_hi =
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
+	snapshot->reg.ring_execlist_sq_contents_lo =
+		hw_engine_mmio_read32(hwe,
+				      RING_EXECLIST_SQ_CONTENTS_LO(0));
+	snapshot->reg.ring_execlist_sq_contents_hi =
+		hw_engine_mmio_read32(hwe,
+				      RING_EXECLIST_SQ_CONTENTS_HI(0));
+	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
+	snapshot->reg.ring_head =
+		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
+	snapshot->reg.ring_tail =
+		hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
+	snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0));
+	snapshot->reg.ring_mi_mode =
+		hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
+	snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0));
+	snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0));
+	snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
+	snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
+	snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
+	snapshot->reg.ring_acthd_udw =
+		hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
+	snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
+	snapshot->reg.ring_bbaddr_udw =
+		hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
+	snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
+	snapshot->reg.ring_dma_fadd_udw =
+		hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
+	snapshot->reg.ring_dma_fadd =
+		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
+	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
+
+	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
+		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
+
+	return snapshot;
+}
+
+/**
+ * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
+ * @snapshot: Xe HW Engine snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given Xe HW Engine snapshot object.
+ */
+void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
+				 struct drm_printer *p)
+{
+	if (!snapshot)
+		return;
+
+	drm_printf(p, "%s (physical), logical instance=%d\n",
+		   snapshot->name ? snapshot->name : "",
+		   snapshot->logical_instance);
+	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
+		   snapshot->forcewake.domain, snapshot->forcewake.ref);
+	drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
+	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
+	drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n",
+		   snapshot->reg.ring_execlist_status_lo);
+	drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n",
+		   snapshot->reg.ring_execlist_status_hi);
+	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n",
+		   snapshot->reg.ring_execlist_sq_contents_lo);
+	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
+		   snapshot->reg.ring_execlist_sq_contents_hi);
+	drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start);
+	drm_printf(p, "\tRING_HEAD:  0x%08x\n", snapshot->reg.ring_head);
+	drm_printf(p, "\tRING_TAIL:  0x%08x\n", snapshot->reg.ring_tail);
+	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
+	drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
+	drm_printf(p, "\tRING_MODE: 0x%08x\n",
+		   snapshot->reg.ring_mode);
+	drm_printf(p, "\tRING_IMR:   0x%08x\n", snapshot->reg.ring_imr);
+	drm_printf(p, "\tRING_ESR:   0x%08x\n", snapshot->reg.ring_esr);
+	drm_printf(p, "\tRING_EMR:   0x%08x\n", snapshot->reg.ring_emr);
+	drm_printf(p, "\tRING_EIR:   0x%08x\n", snapshot->reg.ring_eir);
+	drm_printf(p, "\tACTHD:  0x%08x_%08x\n", snapshot->reg.ring_acthd_udw,
+		   snapshot->reg.ring_acthd);
+	drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", snapshot->reg.ring_bbaddr_udw,
+		   snapshot->reg.ring_bbaddr);
+	drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
+		   snapshot->reg.ring_dma_fadd_udw,
+		   snapshot->reg.ring_dma_fadd);
+	drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr);
+	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
+		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
+			   snapshot->reg.rcu_mode);
+}
+
+/**
+ * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
+ * @snapshot: Xe HW Engine snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
+{
+	if (!snapshot)
+		return;
+
+	kfree(snapshot->name);
+	kfree(snapshot);
+}
+
+/**
+ * xe_hw_engine_print - Xe HW Engine Print.
+ * @hwe: Hardware Engine.
+ * @p: drm_printer.
+ *
+ * This function quickly capture a snapshot and immediately print it out.
+ */
+void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
+{
+	struct xe_hw_engine_snapshot *snapshot;
+
+	snapshot = xe_hw_engine_snapshot_capture(hwe);
+	xe_hw_engine_snapshot_print(snapshot, p);
+	xe_hw_engine_snapshot_free(snapshot);
+}
+
+u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
+				enum xe_engine_class engine_class)
+{
+	u32 mask = 0;
+	enum xe_hw_engine_id id;
+
+	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
+		if (engine_infos[id].class == engine_class &&
+		    gt->info.engine_mask & BIT(id))
+			mask |= BIT(engine_infos[id].instance);
+	}
+	return mask;
+}
+
+bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (hwe->class == XE_ENGINE_CLASS_OTHER)
+		return true;
+
+	/* Check for engines disabled by ccs_mode setting */
+	if (xe_gt_ccs_mode_enabled(gt) &&
+	    hwe->class == XE_ENGINE_CLASS_COMPUTE &&
+	    hwe->logical_instance >= gt->ccs_mode)
+		return true;
+
+	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
+		hwe->instance == gt->usm.reserved_bcs_instance;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
new file mode 100644
index 000000000000..71968ee2f600
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_H_
+#define _XE_HW_ENGINE_H_
+
+#include "xe_hw_engine_types.h"
+
+struct drm_printer;
+
+#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN
+#define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN
+#else
+#define XE_HW_ENGINE_JOB_TIMEOUT_MIN 1
+#endif
+#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MAX
+#define XE_HW_ENGINE_JOB_TIMEOUT_MAX CONFIG_DRM_XE_JOB_TIMEOUT_MAX
+#else
+#define XE_HW_ENGINE_JOB_TIMEOUT_MAX (10 * 1000)
+#endif
+#ifdef CONFIG_DRM_XE_TIMESLICE_MIN
+#define XE_HW_ENGINE_TIMESLICE_MIN CONFIG_DRM_XE_TIMESLICE_MIN
+#else
+#define XE_HW_ENGINE_TIMESLICE_MIN 1
+#endif
+#ifdef CONFIG_DRM_XE_TIMESLICE_MAX
+#define XE_HW_ENGINE_TIMESLICE_MAX CONFIG_DRM_XE_TIMESLICE_MAX
+#else
+#define XE_HW_ENGINE_TIMESLICE_MAX (10 * 1000 * 1000)
+#endif
+#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT CONFIG_DRM_XE_PREEMPT_TIMEOUT
+#else
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT (640 * 1000)
+#endif
+#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN
+#else
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN 1
+#endif
+#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT_MAX
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX CONFIG_DRM_XE_PREEMPT_TIMEOUT_MAX
+#else
+#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX (10 * 1000 * 1000)
+#endif
+
+int xe_hw_engines_init_early(struct xe_gt *gt);
+int xe_hw_engines_init(struct xe_gt *gt);
+void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec);
+void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe);
+u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
+				enum xe_engine_class engine_class);
+
+struct xe_hw_engine_snapshot *
+xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe);
+void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot);
+void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
+				 struct drm_printer *p);
+void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p);
+void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);
+
+bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe);
+static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe)
+{
+	return hwe->name;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
new file mode 100644
index 000000000000..e49bc14f0ecf
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -0,0 +1,675 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include "xe_gt.h"
+#include "xe_hw_engine_class_sysfs.h"
+
+#define MAX_ENGINE_CLASS_NAME_LEN    16
+static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
+					   struct kobject *parent);
+
+/**
+ * xe_hw_engine_timeout_in_range - Helper to check if timeout is in range
+ * @timeout: timeout to validate
+ * @min: min value of valid range
+ * @max: max value of valid range
+ *
+ * This helper helps to validate if timeout is in min-max range of HW engine
+ * scheduler.
+ *
+ * Returns: Returns false value for failure and true for success.
+ */
+bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max)
+{
+	return timeout >= min && timeout <= max;
+}
+
+static void kobj_xe_hw_engine_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static const struct kobj_type kobj_xe_hw_engine_type = {
+	.release = kobj_xe_hw_engine_release,
+	.sysfs_ops = &kobj_sysfs_ops
+};
+
+static ssize_t job_timeout_max_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout < eclass->sched_props.job_timeout_min)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(timeout,
+					   XE_HW_ENGINE_JOB_TIMEOUT_MIN,
+					   XE_HW_ENGINE_JOB_TIMEOUT_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.job_timeout_max, timeout);
+
+	return count;
+}
+
+static ssize_t job_timeout_max_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_max);
+}
+
+static struct kobj_attribute job_timeout_max_attr =
+__ATTR(job_timeout_max, 0644, job_timeout_max_show, job_timeout_max_store);
+
+static ssize_t job_timeout_min_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout > eclass->sched_props.job_timeout_max)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(timeout,
+					   XE_HW_ENGINE_JOB_TIMEOUT_MIN,
+					   XE_HW_ENGINE_JOB_TIMEOUT_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.job_timeout_min, timeout);
+
+	return count;
+}
+
+static ssize_t job_timeout_min_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_min);
+}
+
+static struct kobj_attribute job_timeout_min_attr =
+__ATTR(job_timeout_min, 0644, job_timeout_min_show, job_timeout_min_store);
+
+static ssize_t job_timeout_store(struct kobject *kobj,
+				 struct kobj_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 min = eclass->sched_props.job_timeout_min;
+	u32 max = eclass->sched_props.job_timeout_max;
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (!xe_hw_engine_timeout_in_range(timeout, min, max))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.job_timeout_ms, timeout);
+
+	return count;
+}
+
+static ssize_t job_timeout_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_ms);
+}
+
+static struct kobj_attribute job_timeout_attr =
+__ATTR(job_timeout_ms, 0644, job_timeout_show, job_timeout_store);
+
+static ssize_t job_timeout_default(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_ms);
+}
+
+static struct kobj_attribute job_timeout_def =
+__ATTR(job_timeout_ms, 0444, job_timeout_default, NULL);
+
+static ssize_t job_timeout_min_default(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_min);
+}
+
+static struct kobj_attribute job_timeout_min_def =
+__ATTR(job_timeout_min, 0444, job_timeout_min_default, NULL);
+
+static ssize_t job_timeout_max_default(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_max);
+}
+
+static struct kobj_attribute job_timeout_max_def =
+__ATTR(job_timeout_max, 0444, job_timeout_max_default, NULL);
+
+static ssize_t timeslice_duration_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 min = eclass->sched_props.timeslice_min;
+	u32 max = eclass->sched_props.timeslice_max;
+	u32 duration;
+	int err;
+
+	err = kstrtou32(buf, 0, &duration);
+	if (err)
+		return err;
+
+	if (!xe_hw_engine_timeout_in_range(duration, min, max))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.timeslice_us, duration);
+
+	return count;
+}
+
+static ssize_t timeslice_duration_max_store(struct kobject *kobj,
+					    struct kobj_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 duration;
+	int err;
+
+	err = kstrtou32(buf, 0, &duration);
+	if (err)
+		return err;
+
+	if (duration < eclass->sched_props.timeslice_min)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(duration,
+					   XE_HW_ENGINE_TIMESLICE_MIN,
+					   XE_HW_ENGINE_TIMESLICE_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.timeslice_max, duration);
+
+	return count;
+}
+
+static ssize_t timeslice_duration_max_show(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_max);
+}
+
+static struct kobj_attribute timeslice_duration_max_attr =
+	__ATTR(timeslice_duration_max, 0644, timeslice_duration_max_show,
+	       timeslice_duration_max_store);
+
+static ssize_t timeslice_duration_min_store(struct kobject *kobj,
+					    struct kobj_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 duration;
+	int err;
+
+	err = kstrtou32(buf, 0, &duration);
+	if (err)
+		return err;
+
+	if (duration > eclass->sched_props.timeslice_max)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(duration,
+					   XE_HW_ENGINE_TIMESLICE_MIN,
+					   XE_HW_ENGINE_TIMESLICE_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.timeslice_min, duration);
+
+	return count;
+}
+
+static ssize_t timeslice_duration_min_show(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_min);
+}
+
+static struct kobj_attribute timeslice_duration_min_attr =
+	__ATTR(timeslice_duration_min, 0644, timeslice_duration_min_show,
+	       timeslice_duration_min_store);
+
+static ssize_t timeslice_duration_show(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_us);
+}
+
+static struct kobj_attribute timeslice_duration_attr =
+	__ATTR(timeslice_duration_us, 0644, timeslice_duration_show,
+	       timeslice_duration_store);
+
+static ssize_t timeslice_default(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.timeslice_us);
+}
+
+static struct kobj_attribute timeslice_duration_def =
+__ATTR(timeslice_duration_us, 0444, timeslice_default, NULL);
+
+static ssize_t timeslice_min_default(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.timeslice_min);
+}
+
+static struct kobj_attribute timeslice_duration_min_def =
+__ATTR(timeslice_duration_min, 0444, timeslice_min_default, NULL);
+
+static ssize_t timeslice_max_default(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.timeslice_max);
+}
+
+static struct kobj_attribute timeslice_duration_max_def =
+__ATTR(timeslice_duration_max, 0444, timeslice_max_default, NULL);
+
+static ssize_t preempt_timeout_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 min = eclass->sched_props.preempt_timeout_min;
+	u32 max = eclass->sched_props.preempt_timeout_max;
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (!xe_hw_engine_timeout_in_range(timeout, min, max))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.preempt_timeout_us, timeout);
+
+	return count;
+}
+
+static ssize_t preempt_timeout_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_us);
+}
+
+static struct kobj_attribute preempt_timeout_attr =
+__ATTR(preempt_timeout_us, 0644, preempt_timeout_show, preempt_timeout_store);
+
+static ssize_t preempt_timeout_default(struct kobject *kobj,
+				       struct kobj_attribute *attr,
+				       char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_us);
+}
+
+static struct kobj_attribute preempt_timeout_def =
+__ATTR(preempt_timeout_us, 0444, preempt_timeout_default, NULL);
+
+static ssize_t preempt_timeout_min_default(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_min);
+}
+
+static struct kobj_attribute preempt_timeout_min_def =
+__ATTR(preempt_timeout_min, 0444, preempt_timeout_min_default, NULL);
+
+static ssize_t preempt_timeout_max_default(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
+
+	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_max);
+}
+
+static struct kobj_attribute preempt_timeout_max_def =
+__ATTR(preempt_timeout_max, 0444, preempt_timeout_max_default, NULL);
+
+static ssize_t preempt_timeout_max_store(struct kobject *kobj,
+					 struct kobj_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout < eclass->sched_props.preempt_timeout_min)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(timeout,
+					   XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN,
+					   XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.preempt_timeout_max, timeout);
+
+	return count;
+}
+
+static ssize_t preempt_timeout_max_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_max);
+}
+
+static struct kobj_attribute preempt_timeout_max_attr =
+	__ATTR(preempt_timeout_max, 0644, preempt_timeout_max_show,
+	       preempt_timeout_max_store);
+
+static ssize_t preempt_timeout_min_store(struct kobject *kobj,
+					 struct kobj_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+	u32 timeout;
+	int err;
+
+	err = kstrtou32(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout > eclass->sched_props.preempt_timeout_max)
+		return -EINVAL;
+
+	if (!xe_hw_engine_timeout_in_range(timeout,
+					   XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN,
+					   XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX))
+		return -EINVAL;
+
+	WRITE_ONCE(eclass->sched_props.preempt_timeout_min, timeout);
+
+	return count;
+}
+
+static ssize_t preempt_timeout_min_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
+
+	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_min);
+}
+
+static struct kobj_attribute preempt_timeout_min_attr =
+	__ATTR(preempt_timeout_min, 0644, preempt_timeout_min_show,
+	       preempt_timeout_min_store);
+
+static const struct attribute *defaults[] = {
+	&job_timeout_def.attr,
+	&job_timeout_min_def.attr,
+	&job_timeout_max_def.attr,
+	&timeslice_duration_def.attr,
+	&timeslice_duration_min_def.attr,
+	&timeslice_duration_max_def.attr,
+	&preempt_timeout_def.attr,
+	&preempt_timeout_min_def.attr,
+	&preempt_timeout_max_def.attr,
+	NULL
+};
+
+static const struct attribute *files[] = {
+	&job_timeout_attr.attr,
+	&job_timeout_min_attr.attr,
+	&job_timeout_max_attr.attr,
+	&timeslice_duration_attr.attr,
+	&timeslice_duration_min_attr.attr,
+	&timeslice_duration_max_attr.attr,
+	&preempt_timeout_attr.attr,
+	&preempt_timeout_min_attr.attr,
+	&preempt_timeout_max_attr.attr,
+	NULL
+};
+
+static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_files(kobj, files);
+	kobject_put(kobj);
+}
+
+	static struct kobj_eclass *
+kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name)
+{
+	struct kobj_eclass *keclass;
+	int err = 0;
+
+	keclass = kzalloc(sizeof(*keclass), GFP_KERNEL);
+	if (!keclass)
+		return NULL;
+
+	kobject_init(&keclass->base, &kobj_xe_hw_engine_type);
+	if (kobject_add(&keclass->base, parent, "%s", name)) {
+		kobject_put(&keclass->base);
+		return NULL;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini,
+				       &keclass->base);
+	if (err)
+		drm_warn(&xe->drm,
+			 "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+	return keclass;
+}
+
+static void hw_engine_class_defaults_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_files(kobj, defaults);
+	kobject_put(kobj);
+}
+
+static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
+					   struct kobject *parent)
+{
+	struct kobject *kobj;
+	int err = 0;
+
+	kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+	if (!kobj)
+		return -ENOMEM;
+
+	kobject_init(kobj, &kobj_xe_hw_engine_type);
+	err = kobject_add(kobj, parent, "%s", ".defaults");
+	if (err)
+		goto err_object;
+
+	err = sysfs_create_files(kobj, defaults);
+	if (err)
+		goto err_object;
+
+	err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini,
+				       kobj);
+	if (err)
+		drm_warn(&xe->drm,
+			 "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+	return err;
+err_object:
+	kobject_put(kobj);
+	return err;
+}
+
+static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static const struct kobj_type xe_hw_engine_sysfs_kobj_type = {
+	.release = xe_hw_engine_sysfs_kobj_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	kobject_put(kobj);
+}
+
+/**
+ * xe_hw_engine_class_sysfs_init - Init HW engine classes on GT.
+ * @gt: Xe GT.
+ *
+ * This routine creates sysfs for HW engine classes and adds methods
+ * to get/set different scheduling properties for HW engines class.
+ *
+ * Returns: Returns error value for failure and 0 for success.
+ */
+int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct kobject *kobj;
+	u16 class_mask = 0;
+	int err = 0;
+
+	kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+	if (!kobj)
+		return -ENOMEM;
+
+	kobject_init(kobj, &xe_hw_engine_sysfs_kobj_type);
+
+	err = kobject_add(kobj, gt->sysfs, "engines");
+	if (err)
+		goto err_object;
+
+	for_each_hw_engine(hwe, gt, id) {
+		char name[MAX_ENGINE_CLASS_NAME_LEN];
+		struct kobj_eclass *keclass;
+
+		if (hwe->class == XE_ENGINE_CLASS_OTHER ||
+		    hwe->class == XE_ENGINE_CLASS_MAX)
+			continue;
+
+		if ((class_mask >> hwe->class) & 1)
+			continue;
+
+		class_mask |= 1 << hwe->class;
+
+		switch (hwe->class) {
+		case XE_ENGINE_CLASS_RENDER:
+			strcpy(name, "rcs");
+			break;
+		case XE_ENGINE_CLASS_VIDEO_DECODE:
+			strcpy(name, "vcs");
+			break;
+		case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+			strcpy(name, "vecs");
+			break;
+		case XE_ENGINE_CLASS_COPY:
+			strcpy(name, "bcs");
+			break;
+		case XE_ENGINE_CLASS_COMPUTE:
+			strcpy(name, "ccs");
+			break;
+		default:
+			err = -EINVAL;
+			goto err_object;
+		}
+
+		keclass = kobj_xe_hw_engine_class(xe, kobj, name);
+		if (!keclass) {
+			err = -EINVAL;
+			goto err_object;
+		}
+
+		keclass->eclass = hwe->eclass;
+		err = xe_add_hw_engine_class_defaults(xe, &keclass->base);
+		if (err) {
+			drm_warn(&xe->drm,
+				 "Add .defaults to engines failed!, err: %d\n",
+				 err);
+			goto err_object;
+		}
+
+		err = sysfs_create_files(&keclass->base, files);
+		if (err)
+			goto err_object;
+	}
+
+	err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini,
+				       kobj);
+	if (err)
+		drm_warn(&xe->drm,
+			 "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+
+	return err;
+err_object:
+	kobject_put(kobj);
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
new file mode 100644
index 000000000000..ec5ba673b314
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_ENGINE_CLASS_SYSFS_H_
+#define _XE_ENGINE_CLASS_SYSFS_H_
+
+#include <linux/kobject.h>
+
+struct xe_gt;
+struct xe_hw_engine_class_intf;
+
+int xe_hw_engine_class_sysfs_init(struct xe_gt *gt);
+bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max);
+
+/**
+ * struct kobj_eclass - A eclass's kobject struct that connects the kobject and the
+ * eclass.
+ *
+ * When dealing with multiple eclass, this struct helps to understand which eclass
+ * needs to be addressed on a given sysfs call.
+ */
+struct kobj_eclass {
+	/** @base: The actual kobject */
+	struct kobject base;
+	/** @eclass: A pointer to the hw engine class interface */
+	struct xe_hw_engine_class_intf *eclass;
+};
+
+static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kobj)
+{
+	return container_of(kobj, struct kobj_eclass, base)->eclass;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
new file mode 100644
index 000000000000..39908dec042a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_TYPES_H_
+#define _XE_HW_ENGINE_TYPES_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_lrc_types.h"
+#include "xe_reg_sr_types.h"
+
+/* See "Engine ID Definition" struct in the Icelake PRM */
+enum xe_engine_class {
+	XE_ENGINE_CLASS_RENDER = 0,
+	XE_ENGINE_CLASS_VIDEO_DECODE = 1,
+	XE_ENGINE_CLASS_VIDEO_ENHANCE = 2,
+	XE_ENGINE_CLASS_COPY = 3,
+	XE_ENGINE_CLASS_OTHER = 4,
+	XE_ENGINE_CLASS_COMPUTE = 5,
+	XE_ENGINE_CLASS_MAX = 6,
+};
+
+enum xe_hw_engine_id {
+	XE_HW_ENGINE_RCS0,
+#define XE_HW_ENGINE_RCS_MASK	GENMASK_ULL(XE_HW_ENGINE_RCS0, XE_HW_ENGINE_RCS0)
+	XE_HW_ENGINE_BCS0,
+	XE_HW_ENGINE_BCS1,
+	XE_HW_ENGINE_BCS2,
+	XE_HW_ENGINE_BCS3,
+	XE_HW_ENGINE_BCS4,
+	XE_HW_ENGINE_BCS5,
+	XE_HW_ENGINE_BCS6,
+	XE_HW_ENGINE_BCS7,
+	XE_HW_ENGINE_BCS8,
+#define XE_HW_ENGINE_BCS_MASK	GENMASK_ULL(XE_HW_ENGINE_BCS8, XE_HW_ENGINE_BCS0)
+	XE_HW_ENGINE_VCS0,
+	XE_HW_ENGINE_VCS1,
+	XE_HW_ENGINE_VCS2,
+	XE_HW_ENGINE_VCS3,
+	XE_HW_ENGINE_VCS4,
+	XE_HW_ENGINE_VCS5,
+	XE_HW_ENGINE_VCS6,
+	XE_HW_ENGINE_VCS7,
+#define XE_HW_ENGINE_VCS_MASK	GENMASK_ULL(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0)
+	XE_HW_ENGINE_VECS0,
+	XE_HW_ENGINE_VECS1,
+	XE_HW_ENGINE_VECS2,
+	XE_HW_ENGINE_VECS3,
+#define XE_HW_ENGINE_VECS_MASK	GENMASK_ULL(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0)
+	XE_HW_ENGINE_CCS0,
+	XE_HW_ENGINE_CCS1,
+	XE_HW_ENGINE_CCS2,
+	XE_HW_ENGINE_CCS3,
+#define XE_HW_ENGINE_CCS_MASK	GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)
+	XE_HW_ENGINE_GSCCS0,
+#define XE_HW_ENGINE_GSCCS_MASK	GENMASK_ULL(XE_HW_ENGINE_GSCCS0, XE_HW_ENGINE_GSCCS0)
+	XE_NUM_HW_ENGINES,
+};
+
+/* FIXME: s/XE_HW_ENGINE_MAX_INSTANCE/XE_HW_ENGINE_MAX_COUNT */
+#define XE_HW_ENGINE_MAX_INSTANCE	9
+
+struct xe_bo;
+struct xe_execlist_port;
+struct xe_gt;
+
+/**
+ * struct xe_hw_engine_class_intf - per hw engine class struct interface
+ *
+ * Contains all the hw engine properties per engine class.
+ *
+ * @sched_props: scheduling properties
+ * @defaults: default scheduling properties
+ */
+struct xe_hw_engine_class_intf {
+	/**
+	 * @sched_props: scheduling properties
+	 * @defaults: default scheduling properties
+	 */
+	struct {
+		/** @set_job_timeout: Set job timeout in ms for engine */
+		u32 job_timeout_ms;
+		/** @job_timeout_min: Min job timeout in ms for engine */
+		u32 job_timeout_min;
+		/** @job_timeout_max: Max job timeout in ms for engine */
+		u32 job_timeout_max;
+		/** @timeslice_us: timeslice period in micro-seconds */
+		u32 timeslice_us;
+		/** @timeslice_min: min timeslice period in micro-seconds */
+		u32 timeslice_min;
+		/** @timeslice_max: max timeslice period in micro-seconds */
+		u32 timeslice_max;
+		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		u32 preempt_timeout_us;
+		/** @preempt_timeout_min: min preemption timeout in micro-seconds */
+		u32 preempt_timeout_min;
+		/** @preempt_timeout_max: max preemption timeout in micro-seconds */
+		u32 preempt_timeout_max;
+	} sched_props, defaults;
+};
+
+/**
+ * struct xe_hw_engine - Hardware engine
+ *
+ * Contains all the hardware engine state for physical instances.
+ */
+struct xe_hw_engine {
+	/** @gt: graphics tile this hw engine belongs to */
+	struct xe_gt *gt;
+	/** @name: name of this hw engine */
+	const char *name;
+	/** @class: class of this hw engine */
+	enum xe_engine_class class;
+	/** @instance: physical instance of this hw engine */
+	u16 instance;
+	/** @logical_instance: logical instance of this hw engine */
+	u16 logical_instance;
+	/** @mmio_base: MMIO base address of this hw engine*/
+	u32 mmio_base;
+	/**
+	 * @reg_sr: table with registers to be restored on GT init/resume/reset
+	 */
+	struct xe_reg_sr reg_sr;
+	/**
+	 * @reg_whitelist: table with registers to be whitelisted
+	 */
+	struct xe_reg_sr reg_whitelist;
+	/**
+	 * @reg_lrc: LRC workaround registers
+	 */
+	struct xe_reg_sr reg_lrc;
+	/** @domain: force wake domain of this hw engine */
+	enum xe_force_wake_domains domain;
+	/** @hwsp: hardware status page buffer object */
+	struct xe_bo *hwsp;
+	/** @kernel_lrc: Kernel LRC (should be replaced /w an xe_engine) */
+	struct xe_lrc kernel_lrc;
+	/** @exl_port: execlists port */
+	struct xe_execlist_port *exl_port;
+	/** @fence_irq: fence IRQ to run when a hw engine IRQ is received */
+	struct xe_hw_fence_irq *fence_irq;
+	/** @irq_handler: IRQ handler to run when hw engine IRQ is received */
+	void (*irq_handler)(struct xe_hw_engine *hwe, u16 intr_vec);
+	/** @engine_id: id  for this hw engine */
+	enum xe_hw_engine_id engine_id;
+	/** @eclass: pointer to per hw engine class interface */
+	struct xe_hw_engine_class_intf *eclass;
+};
+
+/**
+ * struct xe_hw_engine_snapshot - Hardware engine snapshot
+ *
+ * Contains the snapshot of useful hardware engine info and registers.
+ */
+struct xe_hw_engine_snapshot {
+	/** @name: name of the hw engine */
+	char *name;
+	/** @class: class of this hw engine */
+	enum xe_engine_class class;
+	/** @logical_instance: logical instance of this hw engine */
+	u16 logical_instance;
+	/** @forcewake: Force Wake information snapshot */
+	struct {
+		/** @domain: force wake domain of this hw engine */
+		enum xe_force_wake_domains domain;
+		/** @ref: Forcewake ref for the above domain */
+		int ref;
+	} forcewake;
+	/** @mmio_base: MMIO base address of this hw engine*/
+	u32 mmio_base;
+	/** @reg: Useful MMIO register snapshot */
+	struct {
+		/** @ring_hwstam: RING_HWSTAM */
+		u32 ring_hwstam;
+		/** @ring_hws_pga: RING_HWS_PGA */
+		u32 ring_hws_pga;
+		/** @ring_execlist_status_lo: RING_EXECLIST_STATUS_LO */
+		u32 ring_execlist_status_lo;
+		/** @ring_execlist_status_hi: RING_EXECLIST_STATUS_HI */
+		u32 ring_execlist_status_hi;
+		/** @ring_execlist_sq_contents_lo: RING_EXECLIST_SQ_CONTENTS */
+		u32 ring_execlist_sq_contents_lo;
+		/** @ring_execlist_sq_contents_hi: RING_EXECLIST_SQ_CONTENTS + 4 */
+		u32 ring_execlist_sq_contents_hi;
+		/** @ring_start: RING_START */
+		u32 ring_start;
+		/** @ring_head: RING_HEAD */
+		u32 ring_head;
+		/** @ring_tail: RING_TAIL */
+		u32 ring_tail;
+		/** @ring_ctl: RING_CTL */
+		u32 ring_ctl;
+		/** @ring_mi_mode: RING_MI_MODE */
+		u32 ring_mi_mode;
+		/** @ring_mode: RING_MODE */
+		u32 ring_mode;
+		/** @ring_imr: RING_IMR */
+		u32 ring_imr;
+		/** @ring_esr: RING_ESR */
+		u32 ring_esr;
+		/** @ring_emr: RING_EMR */
+		u32 ring_emr;
+		/** @ring_eir: RING_EIR */
+		u32 ring_eir;
+		/** @ring_acthd_udw: RING_ACTHD_UDW */
+		u32 ring_acthd_udw;
+		/** @ring_acthd: RING_ACTHD */
+		u32 ring_acthd;
+		/** @ring_bbaddr_udw: RING_BBADDR_UDW */
+		u32 ring_bbaddr_udw;
+		/** @ring_bbaddr: RING_BBADDR */
+		u32 ring_bbaddr;
+		/** @ring_dma_fadd_udw: RING_DMA_FADD_UDW */
+		u32 ring_dma_fadd_udw;
+		/** @ring_dma_fadd: RING_DMA_FADD */
+		u32 ring_dma_fadd;
+		/** @ipehr: IPEHR */
+		u32 ipehr;
+		/** @rcu_mode: RCU_MODE */
+		u32 rcu_mode;
+	} reg;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
new file mode 100644
index 000000000000..a6094c81f2ad
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_hw_fence.h"
+
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_trace.h"
+
+static struct kmem_cache *xe_hw_fence_slab;
+
+int __init xe_hw_fence_module_init(void)
+{
+	xe_hw_fence_slab = kmem_cache_create("xe_hw_fence",
+					     sizeof(struct xe_hw_fence), 0,
+					     SLAB_HWCACHE_ALIGN, NULL);
+	if (!xe_hw_fence_slab)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void xe_hw_fence_module_exit(void)
+{
+	rcu_barrier();
+	kmem_cache_destroy(xe_hw_fence_slab);
+}
+
+static struct xe_hw_fence *fence_alloc(void)
+{
+	return kmem_cache_zalloc(xe_hw_fence_slab, GFP_KERNEL);
+}
+
+static void fence_free(struct rcu_head *rcu)
+{
+	struct xe_hw_fence *fence =
+		container_of(rcu, struct xe_hw_fence, dma.rcu);
+
+	if (!WARN_ON_ONCE(!fence))
+		kmem_cache_free(xe_hw_fence_slab, fence);
+}
+
+static void hw_fence_irq_run_cb(struct irq_work *work)
+{
+	struct xe_hw_fence_irq *irq = container_of(work, typeof(*irq), work);
+	struct xe_hw_fence *fence, *next;
+	bool tmp;
+
+	tmp = dma_fence_begin_signalling();
+	spin_lock(&irq->lock);
+	if (irq->enabled) {
+		list_for_each_entry_safe(fence, next, &irq->pending, irq_link) {
+			struct dma_fence *dma_fence = &fence->dma;
+
+			trace_xe_hw_fence_try_signal(fence);
+			if (dma_fence_is_signaled_locked(dma_fence)) {
+				trace_xe_hw_fence_signal(fence);
+				list_del_init(&fence->irq_link);
+				dma_fence_put(dma_fence);
+			}
+		}
+	}
+	spin_unlock(&irq->lock);
+	dma_fence_end_signalling(tmp);
+}
+
+void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq)
+{
+	spin_lock_init(&irq->lock);
+	init_irq_work(&irq->work, hw_fence_irq_run_cb);
+	INIT_LIST_HEAD(&irq->pending);
+	irq->enabled = true;
+}
+
+void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq)
+{
+	struct xe_hw_fence *fence, *next;
+	unsigned long flags;
+	int err;
+	bool tmp;
+
+	if (XE_WARN_ON(!list_empty(&irq->pending))) {
+		tmp = dma_fence_begin_signalling();
+		spin_lock_irqsave(&irq->lock, flags);
+		list_for_each_entry_safe(fence, next, &irq->pending, irq_link) {
+			list_del_init(&fence->irq_link);
+			err = dma_fence_signal_locked(&fence->dma);
+			dma_fence_put(&fence->dma);
+			XE_WARN_ON(err);
+		}
+		spin_unlock_irqrestore(&irq->lock, flags);
+		dma_fence_end_signalling(tmp);
+	}
+}
+
+void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq)
+{
+	irq_work_queue(&irq->work);
+}
+
+void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq)
+{
+	spin_lock_irq(&irq->lock);
+	irq->enabled = false;
+	spin_unlock_irq(&irq->lock);
+}
+
+void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq)
+{
+	spin_lock_irq(&irq->lock);
+	irq->enabled = true;
+	spin_unlock_irq(&irq->lock);
+
+	irq_work_queue(&irq->work);
+}
+
+void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
+			  struct xe_hw_fence_irq *irq, const char *name)
+{
+	ctx->gt = gt;
+	ctx->irq = irq;
+	ctx->dma_fence_ctx = dma_fence_context_alloc(1);
+	ctx->next_seqno = XE_FENCE_INITIAL_SEQNO;
+	sprintf(ctx->name, "%s", name);
+}
+
+void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx)
+{
+}
+
+static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence);
+
+static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence)
+{
+	return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock);
+}
+
+static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+	return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev);
+}
+
+static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+	return fence->ctx->name;
+}
+
+static bool xe_hw_fence_signaled(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+	struct xe_device *xe = gt_to_xe(fence->ctx->gt);
+	u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32);
+
+	return dma_fence->error ||
+		!__dma_fence_is_later(dma_fence->seqno, seqno, dma_fence->ops);
+}
+
+static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+	struct xe_hw_fence_irq *irq = xe_hw_fence_irq(fence);
+
+	dma_fence_get(dma_fence);
+	list_add_tail(&fence->irq_link, &irq->pending);
+
+	/* SW completed (no HW IRQ) so kick handler to signal fence */
+	if (xe_hw_fence_signaled(dma_fence))
+		xe_hw_fence_irq_run(irq);
+
+	return true;
+}
+
+static void xe_hw_fence_release(struct dma_fence *dma_fence)
+{
+	struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+	trace_xe_hw_fence_free(fence);
+	XE_WARN_ON(!list_empty(&fence->irq_link));
+	call_rcu(&dma_fence->rcu, fence_free);
+}
+
+static const struct dma_fence_ops xe_hw_fence_ops = {
+	.get_driver_name = xe_hw_fence_get_driver_name,
+	.get_timeline_name = xe_hw_fence_get_timeline_name,
+	.enable_signaling = xe_hw_fence_enable_signaling,
+	.signaled = xe_hw_fence_signaled,
+	.release = xe_hw_fence_release,
+};
+
+static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence)
+{
+	if (XE_WARN_ON(fence->ops != &xe_hw_fence_ops))
+		return NULL;
+
+	return container_of(fence, struct xe_hw_fence, dma);
+}
+
+struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
+				       struct iosys_map seqno_map)
+{
+	struct xe_hw_fence *fence;
+
+	fence = fence_alloc();
+	if (!fence)
+		return ERR_PTR(-ENOMEM);
+
+	dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock,
+		       ctx->dma_fence_ctx, ctx->next_seqno++);
+
+	fence->ctx = ctx;
+	fence->seqno_map = seqno_map;
+	INIT_LIST_HEAD(&fence->irq_link);
+
+	trace_xe_hw_fence_create(fence);
+
+	return fence;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h
new file mode 100644
index 000000000000..cfe5fd603787
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_HW_FENCE_H_
+#define _XE_HW_FENCE_H_
+
+#include "xe_hw_fence_types.h"
+
+/* Cause an early wrap to catch wrapping errors */
+#define XE_FENCE_INITIAL_SEQNO (-127)
+
+int xe_hw_fence_module_init(void);
+void xe_hw_fence_module_exit(void);
+
+void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq);
+
+void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
+			  struct xe_hw_fence_irq *irq, const char *name);
+void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx);
+
+struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
+				       struct iosys_map seqno_map);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h
new file mode 100644
index 000000000000..b33c4956e8ea
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HW_FENCE_TYPES_H_
+#define _XE_HW_FENCE_TYPES_H_
+
+#include <linux/dma-fence.h>
+#include <linux/iosys-map.h>
+#include <linux/irq_work.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct xe_gt;
+
+/**
+ * struct xe_hw_fence_irq - hardware fence IRQ handler
+ *
+ * One per engine class, signals completed xe_hw_fences, triggered via hw engine
+ * interrupt. On each trigger, search list of pending fences and signal.
+ */
+struct xe_hw_fence_irq {
+	/** @lock: protects all xe_hw_fences + pending list */
+	spinlock_t lock;
+	/** @work: IRQ worker run to signal the fences */
+	struct irq_work work;
+	/** @pending: list of pending xe_hw_fences */
+	struct list_head pending;
+	/** @enabled: fence signaling enabled */
+	bool enabled;
+};
+
+#define MAX_FENCE_NAME_LEN	16
+
+/**
+ * struct xe_hw_fence_ctx - hardware fence context
+ *
+ * The context for a hardware fence. 1 to 1 relationship with xe_engine. Points
+ * to a xe_hw_fence_irq, maintains serial seqno.
+ */
+struct xe_hw_fence_ctx {
+	/** @gt: graphics tile of hardware fence context */
+	struct xe_gt *gt;
+	/** @irq: fence irq handler */
+	struct xe_hw_fence_irq *irq;
+	/** @dma_fence_ctx: dma fence context for hardware fence */
+	u64 dma_fence_ctx;
+	/** @next_seqno: next seqno for hardware fence */
+	u32 next_seqno;
+	/** @name: name of hardware fence context */
+	char name[MAX_FENCE_NAME_LEN];
+};
+
+/**
+ * struct xe_hw_fence - hardware fence
+ *
+ * Used to indicate a xe_sched_job is complete via a seqno written to memory.
+ * Signals on error or seqno past.
+ */
+struct xe_hw_fence {
+	/** @dma: base dma fence for hardware fence context */
+	struct dma_fence dma;
+	/** @ctx: hardware fence context */
+	struct xe_hw_fence_ctx *ctx;
+	/** @seqno_map: I/O map for seqno */
+	struct iosys_map seqno_map;
+	/** @irq_link: Link in struct xe_hw_fence_irq.pending */
+	struct list_head irq_link;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
new file mode 100644
index 000000000000..6ef2aa1eae8b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -0,0 +1,776 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/hwmon-sysfs.h>
+#include <linux/hwmon.h>
+#include <linux/types.h>
+
+#include <drm/drm_managed.h>
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_mchbar_regs.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_hwmon.h"
+#include "xe_mmio.h"
+#include "xe_pcode.h"
+#include "xe_pcode_api.h"
+
+enum xe_hwmon_reg {
+	REG_PKG_RAPL_LIMIT,
+	REG_PKG_POWER_SKU,
+	REG_PKG_POWER_SKU_UNIT,
+	REG_GT_PERF_STATUS,
+	REG_PKG_ENERGY_STATUS,
+};
+
+enum xe_hwmon_reg_operation {
+	REG_READ32,
+	REG_RMW32,
+	REG_READ64,
+};
+
+/*
+ * SF_* - scale factors for particular quantities according to hwmon spec.
+ */
+#define SF_POWER	1000000		/* microwatts */
+#define SF_CURR		1000		/* milliamperes */
+#define SF_VOLTAGE	1000		/* millivolts */
+#define SF_ENERGY	1000000		/* microjoules */
+#define SF_TIME		1000		/* milliseconds */
+
+/**
+ * struct xe_hwmon_energy_info - to accumulate energy
+ */
+struct xe_hwmon_energy_info {
+	/** @reg_val_prev: previous energy reg val */
+	u32 reg_val_prev;
+	/** @accum_energy: accumulated energy */
+	long accum_energy;
+};
+
+/**
+ * struct xe_hwmon - xe hwmon data structure
+ */
+struct xe_hwmon {
+	/** @hwmon_dev: hwmon device for xe */
+	struct device *hwmon_dev;
+	/** @gt: primary gt */
+	struct xe_gt *gt;
+	/** @hwmon_lock: lock for rw attributes*/
+	struct mutex hwmon_lock;
+	/** @scl_shift_power: pkg power unit */
+	int scl_shift_power;
+	/** @scl_shift_energy: pkg energy unit */
+	int scl_shift_energy;
+	/** @scl_shift_time: pkg time unit */
+	int scl_shift_time;
+	/** @ei: Energy info for energy1_input */
+	struct xe_hwmon_energy_info ei;
+};
+
+static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg)
+{
+	struct xe_device *xe = gt_to_xe(hwmon->gt);
+	struct xe_reg reg = XE_REG(0);
+
+	switch (hwmon_reg) {
+	case REG_PKG_RAPL_LIMIT:
+		if (xe->info.platform == XE_DG2)
+			reg = PCU_CR_PACKAGE_RAPL_LIMIT;
+		else if (xe->info.platform == XE_PVC)
+			reg = PVC_GT0_PACKAGE_RAPL_LIMIT;
+		break;
+	case REG_PKG_POWER_SKU:
+		if (xe->info.platform == XE_DG2)
+			reg = PCU_CR_PACKAGE_POWER_SKU;
+		else if (xe->info.platform == XE_PVC)
+			reg = PVC_GT0_PACKAGE_POWER_SKU;
+		break;
+	case REG_PKG_POWER_SKU_UNIT:
+		if (xe->info.platform == XE_DG2)
+			reg = PCU_CR_PACKAGE_POWER_SKU_UNIT;
+		else if (xe->info.platform == XE_PVC)
+			reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT;
+		break;
+	case REG_GT_PERF_STATUS:
+		if (xe->info.platform == XE_DG2)
+			reg = GT_PERF_STATUS;
+		break;
+	case REG_PKG_ENERGY_STATUS:
+		if (xe->info.platform == XE_DG2)
+			reg = PCU_CR_PACKAGE_ENERGY_STATUS;
+		else if (xe->info.platform == XE_PVC)
+			reg = PVC_GT0_PLATFORM_ENERGY_STATUS;
+		break;
+	default:
+		drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg);
+		break;
+	}
+
+	return reg.raw;
+}
+
+static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
+				 enum xe_hwmon_reg_operation operation, u64 *value,
+				 u32 clr, u32 set)
+{
+	struct xe_reg reg;
+
+	reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg);
+
+	if (!reg.raw)
+		return;
+
+	switch (operation) {
+	case REG_READ32:
+		*value = xe_mmio_read32(hwmon->gt, reg);
+		break;
+	case REG_RMW32:
+		*value = xe_mmio_rmw32(hwmon->gt, reg, clr, set);
+		break;
+	case REG_READ64:
+		*value = xe_mmio_read64_2x32(hwmon->gt, reg);
+		break;
+	default:
+		drm_warn(&gt_to_xe(hwmon->gt)->drm, "Invalid xe hwmon reg operation: %d\n",
+			 operation);
+		break;
+	}
+}
+
+#define PL1_DISABLE 0
+
+/*
+ * HW allows arbitrary PL1 limits to be set but silently clamps these values to
+ * "typical but not guaranteed" min/max values in REG_PKG_POWER_SKU. Follow the
+ * same pattern for sysfs, allow arbitrary PL1 limits to be set but display
+ * clamped values when read.
+ */
+static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value)
+{
+	u64 reg_val, min, max;
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val, 0, 0);
+	/* Check if PL1 limit is disabled */
+	if (!(reg_val & PKG_PWR_LIM_1_EN)) {
+		*value = PL1_DISABLE;
+		goto unlock;
+	}
+
+	reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val);
+	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, &reg_val, 0, 0);
+	min = REG_FIELD_GET(PKG_MIN_PWR, reg_val);
+	min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
+	max = REG_FIELD_GET(PKG_MAX_PWR, reg_val);
+	max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power);
+
+	if (min && max)
+		*value = clamp_t(u64, *value, min, max);
+unlock:
+	mutex_unlock(&hwmon->hwmon_lock);
+}
+
+static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
+{
+	int ret = 0;
+	u64 reg_val;
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	/* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */
+	if (value == PL1_DISABLE) {
+		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
+				     PKG_PWR_LIM_1_EN, 0);
+		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val,
+				     PKG_PWR_LIM_1_EN, 0);
+
+		if (reg_val & PKG_PWR_LIM_1_EN) {
+			ret = -EOPNOTSUPP;
+			goto unlock;
+		}
+	}
+
+	/* Computation in 64-bits to avoid overflow. Round to nearest. */
+	reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER);
+	reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
+			     PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val);
+unlock:
+	mutex_unlock(&hwmon->hwmon_lock);
+	return ret;
+}
+
+static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value)
+{
+	u64 reg_val;
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, &reg_val, 0, 0);
+	reg_val = REG_FIELD_GET(PKG_TDP, reg_val);
+	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
+}
+
+/*
+ * xe_hwmon_energy_get - Obtain energy value
+ *
+ * The underlying energy hardware register is 32-bits and is subject to
+ * overflow. How long before overflow? For example, with an example
+ * scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and
+ * a power draw of 1000 watts, the 32-bit counter will overflow in
+ * approximately 4.36 minutes.
+ *
+ * Examples:
+ *    1 watt:  (2^32 >> 14) /    1 W / (60 * 60 * 24) secs/day -> 3 days
+ * 1000 watts: (2^32 >> 14) / 1000 W / 60             secs/min -> 4.36 minutes
+ *
+ * The function significantly increases overflow duration (from 4.36
+ * minutes) by accumulating the energy register into a 'long' as allowed by
+ * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()),
+ * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and
+ * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before
+ * energy1_input overflows. This at 1000 W is an overflow duration of 278 years.
+ */
+static void
+xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy)
+{
+	struct xe_hwmon_energy_info *ei = &hwmon->ei;
+	u64 reg_val;
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ32,
+			     &reg_val, 0, 0);
+
+	if (reg_val >= ei->reg_val_prev)
+		ei->accum_energy += reg_val - ei->reg_val_prev;
+	else
+		ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
+
+	ei->reg_val_prev = reg_val;
+
+	*energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
+				  hwmon->scl_shift_energy);
+}
+
+static ssize_t
+xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
+				  char *buf)
+{
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	u32 x, y, x_w = 2; /* 2 bits */
+	u64 r, tau4, out;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT,
+			     REG_READ32, &r, 0, 0);
+
+	mutex_unlock(&hwmon->hwmon_lock);
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r);
+	y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r);
+
+	/*
+	 * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17)
+	 *     = (4 | x) << (y - 2)
+	 *
+	 * Here (y - 2) ensures a 1.x fixed point representation of 1.x
+	 * As x is 2 bits so 1.x can be 1.0, 1.25, 1.50, 1.75
+	 *
+	 * As y can be < 2, we compute tau4 = (4 | x) << y
+	 * and then add 2 when doing the final right shift to account for units
+	 */
+	tau4 = ((1 << x_w) | x) << y;
+
+	/* val in hwmon interface units (millisec) */
+	out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
+
+	return sysfs_emit(buf, "%llu\n", out);
+}
+
+static ssize_t
+xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	u32 x, y, rxy, x_w = 2; /* 2 bits */
+	u64 tau4, r, max_win;
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	/*
+	 * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12.
+	 * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds.
+	 *
+	 * The ideal scenario is for PKG_MAX_WIN to be read from the PKG_PWR_SKU register.
+	 * However, it is observed that existing discrete GPUs does not provide correct
+	 * PKG_MAX_WIN value, therefore a using default constant value. For future discrete GPUs
+	 * this may get resolved, in which case PKG_MAX_WIN should be obtained from PKG_PWR_SKU.
+	 */
+#define PKG_MAX_WIN_DEFAULT 0x12ull
+
+	/*
+	 * val must be < max in hwmon interface units. The steps below are
+	 * explained in xe_hwmon_power1_max_interval_show()
+	 */
+	r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
+	x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
+	y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
+	tau4 = ((1 << x_w) | x) << y;
+	max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
+
+	if (val > max_win)
+		return -EINVAL;
+
+	/* val in hw units */
+	val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME);
+
+	/*
+	 * Convert val to 1.x * power(2,y)
+	 * y = ilog2(val)
+	 * x = (val - (1 << y)) >> (y - 2)
+	 */
+	if (!val) {
+		y = 0;
+		x = 0;
+	} else {
+		y = ilog2(val);
+		x = (val - (1ul << y)) << x_w >> y;
+	}
+
+	rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, (u64 *)&r,
+			     PKG_PWR_LIM_1_TIME, rxy);
+
+	mutex_unlock(&hwmon->hwmon_lock);
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return count;
+}
+
+static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
+			  xe_hwmon_power1_max_interval_show,
+			  xe_hwmon_power1_max_interval_store, 0);
+
+static struct attribute *hwmon_attributes[] = {
+	&sensor_dev_attr_power1_max_interval.dev_attr.attr,
+	NULL
+};
+
+static umode_t xe_hwmon_attributes_visible(struct kobject *kobj,
+					   struct attribute *attr, int index)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	int ret = 0;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
+		ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? attr->mode : 0;
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return ret;
+}
+
+static const struct attribute_group hwmon_attrgroup = {
+	.attrs = hwmon_attributes,
+	.is_visible = xe_hwmon_attributes_visible,
+};
+
+static const struct attribute_group *hwmon_groups[] = {
+	&hwmon_attrgroup,
+	NULL
+};
+
+static const struct hwmon_channel_info *hwmon_info[] = {
+	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
+	HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
+	HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
+	HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT),
+	NULL
+};
+
+/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */
+static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval)
+{
+	/* Avoid Illegal Subcommand error */
+	if (gt_to_xe(gt)->info.platform == XE_DG2)
+		return -ENXIO;
+
+	return xe_pcode_read(gt, PCODE_MBOX(PCODE_POWER_SETUP,
+			     POWER_SETUP_SUBCOMMAND_READ_I1, 0),
+			     uval, 0);
+}
+
+static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval)
+{
+	return xe_pcode_write(gt, PCODE_MBOX(PCODE_POWER_SETUP,
+			      POWER_SETUP_SUBCOMMAND_WRITE_I1, 0),
+			      uval);
+}
+
+static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, long *value, u32 scale_factor)
+{
+	int ret;
+	u32 uval;
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval);
+	if (ret)
+		goto unlock;
+
+	*value = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
+				 scale_factor, POWER_SETUP_I1_SHIFT);
+unlock:
+	mutex_unlock(&hwmon->hwmon_lock);
+	return ret;
+}
+
+static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u32 scale_factor)
+{
+	int ret;
+	u32 uval;
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor);
+	ret = xe_hwmon_pcode_write_i1(hwmon->gt, uval);
+
+	mutex_unlock(&hwmon->hwmon_lock);
+	return ret;
+}
+
+static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, long *value)
+{
+	u64 reg_val;
+
+	xe_hwmon_process_reg(hwmon, REG_GT_PERF_STATUS,
+			     REG_READ32, &reg_val, 0, 0);
+	/* HW register value in units of 2.5 millivolt */
+	*value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE);
+}
+
+static umode_t
+xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan)
+{
+	u32 uval;
+
+	switch (attr) {
+	case hwmon_power_max:
+		return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? 0664 : 0;
+	case hwmon_power_rated_max:
+		return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU) ? 0444 : 0;
+	case hwmon_power_crit:
+		return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
+			!(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+	default:
+		return 0;
+	}
+}
+
+static int
+xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val)
+{
+	switch (attr) {
+	case hwmon_power_max:
+		xe_hwmon_power_max_read(hwmon, val);
+		return 0;
+	case hwmon_power_rated_max:
+		xe_hwmon_power_rated_max_read(hwmon, val);
+		return 0;
+	case hwmon_power_crit:
+		return xe_hwmon_power_curr_crit_read(hwmon, val, SF_POWER);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int chan, long val)
+{
+	switch (attr) {
+	case hwmon_power_max:
+		return xe_hwmon_power_max_write(hwmon, val);
+	case hwmon_power_crit:
+		return xe_hwmon_power_curr_crit_write(hwmon, val, SF_POWER);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static umode_t
+xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr)
+{
+	u32 uval;
+
+	switch (attr) {
+	case hwmon_curr_crit:
+		return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
+			(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+	default:
+		return 0;
+	}
+}
+
+static int
+xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+{
+	switch (attr) {
+	case hwmon_curr_crit:
+		return xe_hwmon_power_curr_crit_read(hwmon, val, SF_CURR);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, long val)
+{
+	switch (attr) {
+	case hwmon_curr_crit:
+		return xe_hwmon_power_curr_crit_write(hwmon, val, SF_CURR);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static umode_t
+xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr)
+{
+	switch (attr) {
+	case hwmon_in_input:
+		return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS) ? 0444 : 0;
+	default:
+		return 0;
+	}
+}
+
+static int
+xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+{
+	switch (attr) {
+	case hwmon_in_input:
+		xe_hwmon_get_voltage(hwmon, val);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static umode_t
+xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr)
+{
+	switch (attr) {
+	case hwmon_energy_input:
+		return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS) ? 0444 : 0;
+	default:
+		return 0;
+	}
+}
+
+static int
+xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+{
+	switch (attr) {
+	case hwmon_energy_input:
+		xe_hwmon_energy_get(hwmon, val);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static umode_t
+xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
+		    u32 attr, int channel)
+{
+	struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata;
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	switch (type) {
+	case hwmon_power:
+		ret = xe_hwmon_power_is_visible(hwmon, attr, channel);
+		break;
+	case hwmon_curr:
+		ret = xe_hwmon_curr_is_visible(hwmon, attr);
+		break;
+	case hwmon_in:
+		ret = xe_hwmon_in_is_visible(hwmon, attr);
+		break;
+	case hwmon_energy:
+		ret = xe_hwmon_energy_is_visible(hwmon, attr);
+		break;
+	default:
+		ret = 0;
+		break;
+	}
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return ret;
+}
+
+static int
+xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+	      int channel, long *val)
+{
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	switch (type) {
+	case hwmon_power:
+		ret = xe_hwmon_power_read(hwmon, attr, channel, val);
+		break;
+	case hwmon_curr:
+		ret = xe_hwmon_curr_read(hwmon, attr, val);
+		break;
+	case hwmon_in:
+		ret = xe_hwmon_in_read(hwmon, attr, val);
+		break;
+	case hwmon_energy:
+		ret = xe_hwmon_energy_read(hwmon, attr, val);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return ret;
+}
+
+static int
+xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+	       int channel, long val)
+{
+	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
+	int ret;
+
+	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+
+	switch (type) {
+	case hwmon_power:
+		ret = xe_hwmon_power_write(hwmon, attr, channel, val);
+		break;
+	case hwmon_curr:
+		ret = xe_hwmon_curr_write(hwmon, attr, val);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+
+	return ret;
+}
+
+static const struct hwmon_ops hwmon_ops = {
+	.is_visible = xe_hwmon_is_visible,
+	.read = xe_hwmon_read,
+	.write = xe_hwmon_write,
+};
+
+static const struct hwmon_chip_info hwmon_chip_info = {
+	.ops = &hwmon_ops,
+	.info = hwmon_info,
+};
+
+static void
+xe_hwmon_get_preregistration_info(struct xe_device *xe)
+{
+	struct xe_hwmon *hwmon = xe->hwmon;
+	long energy;
+	u64 val_sku_unit = 0;
+
+	/*
+	 * The contents of register PKG_POWER_SKU_UNIT do not change,
+	 * so read it once and store the shift values.
+	 */
+	if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT)) {
+		xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT,
+				     REG_READ32, &val_sku_unit, 0, 0);
+		hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
+		hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
+		hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
+	}
+
+	/*
+	 * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the
+	 * first value of the energy register read
+	 */
+	if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, 0))
+		xe_hwmon_energy_get(hwmon, &energy);
+}
+
+static void xe_hwmon_mutex_destroy(void *arg)
+{
+	struct xe_hwmon *hwmon = arg;
+
+	mutex_destroy(&hwmon->hwmon_lock);
+}
+
+void xe_hwmon_register(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+	struct xe_hwmon *hwmon;
+
+	/* hwmon is available only for dGfx */
+	if (!IS_DGFX(xe))
+		return;
+
+	hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+	if (!hwmon)
+		return;
+
+	xe->hwmon = hwmon;
+
+	mutex_init(&hwmon->hwmon_lock);
+	if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon))
+		return;
+
+	/* primary GT to access device level properties */
+	hwmon->gt = xe->tiles[0].primary_gt;
+
+	xe_hwmon_get_preregistration_info(xe);
+
+	drm_dbg(&xe->drm, "Register xe hwmon interface\n");
+
+	/*  hwmon_dev points to device hwmon<i> */
+	hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon,
+								&hwmon_chip_info,
+								hwmon_groups);
+
+	if (IS_ERR(hwmon->hwmon_dev)) {
+		drm_warn(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev);
+		xe->hwmon = NULL;
+		return;
+	}
+}
+
diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h
new file mode 100644
index 000000000000..c42a1de2cd7a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hwmon.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_HWMON_H_
+#define _XE_HWMON_H_
+
+#include <linux/types.h>
+
+struct xe_device;
+
+#if IS_REACHABLE(CONFIG_HWMON)
+void xe_hwmon_register(struct xe_device *xe);
+#else
+static inline void xe_hwmon_register(struct xe_device *xe) { };
+#endif
+
+#endif /* _XE_HWMON_H_ */
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
new file mode 100644
index 000000000000..d1f5ba4bb745
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -0,0 +1,666 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_irq.h"
+
+#include <linux/sched/clock.h>
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_device.h"
+#include "xe_display.h"
+#include "xe_drv.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_hw_engine.h"
+#include "xe_mmio.h"
+
+/*
+ * Interrupt registers for a unit are always consecutive and ordered
+ * ISR, IMR, IIR, IER.
+ */
+#define IMR(offset)				XE_REG(offset + 0x4)
+#define IIR(offset)				XE_REG(offset + 0x8)
+#define IER(offset)				XE_REG(offset + 0xc)
+
+static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg)
+{
+	u32 val = xe_mmio_read32(mmio, reg);
+
+	if (val == 0)
+		return;
+
+	drm_WARN(&gt_to_xe(mmio)->drm, 1,
+		 "Interrupt register 0x%x is not zero: 0x%08x\n",
+		 reg.addr, val);
+	xe_mmio_write32(mmio, reg, 0xffffffff);
+	xe_mmio_read32(mmio, reg);
+	xe_mmio_write32(mmio, reg, 0xffffffff);
+	xe_mmio_read32(mmio, reg);
+}
+
+/*
+ * Unmask and enable the specified interrupts.  Does not check current state,
+ * so any bits not specified here will become masked and disabled.
+ */
+static void unmask_and_enable(struct xe_tile *tile, u32 irqregs, u32 bits)
+{
+	struct xe_gt *mmio = tile->primary_gt;
+
+	/*
+	 * If we're just enabling an interrupt now, it shouldn't already
+	 * be raised in the IIR.
+	 */
+	assert_iir_is_zero(mmio, IIR(irqregs));
+
+	xe_mmio_write32(mmio, IER(irqregs), bits);
+	xe_mmio_write32(mmio, IMR(irqregs), ~bits);
+
+	/* Posting read */
+	xe_mmio_read32(mmio, IMR(irqregs));
+}
+
+/* Mask and disable all interrupts. */
+static void mask_and_disable(struct xe_tile *tile, u32 irqregs)
+{
+	struct xe_gt *mmio = tile->primary_gt;
+
+	xe_mmio_write32(mmio, IMR(irqregs), ~0);
+	/* Posting read */
+	xe_mmio_read32(mmio, IMR(irqregs));
+
+	xe_mmio_write32(mmio, IER(irqregs), 0);
+
+	/* IIR can theoretically queue up two events. Be paranoid. */
+	xe_mmio_write32(mmio, IIR(irqregs), ~0);
+	xe_mmio_read32(mmio, IIR(irqregs));
+	xe_mmio_write32(mmio, IIR(irqregs), ~0);
+	xe_mmio_read32(mmio, IIR(irqregs));
+}
+
+static u32 xelp_intr_disable(struct xe_device *xe)
+{
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+
+	xe_mmio_write32(mmio, GFX_MSTR_IRQ, 0);
+
+	/*
+	 * Now with master disabled, get a sample of level indications
+	 * for this interrupt. Indications will be cleared on related acks.
+	 * New indications can and will light up during processing,
+	 * and will generate new interrupt after enabling master.
+	 */
+	return xe_mmio_read32(mmio, GFX_MSTR_IRQ);
+}
+
+static u32
+gu_misc_irq_ack(struct xe_device *xe, const u32 master_ctl)
+{
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+	u32 iir;
+
+	if (!(master_ctl & GU_MISC_IRQ))
+		return 0;
+
+	iir = xe_mmio_read32(mmio, IIR(GU_MISC_IRQ_OFFSET));
+	if (likely(iir))
+		xe_mmio_write32(mmio, IIR(GU_MISC_IRQ_OFFSET), iir);
+
+	return iir;
+}
+
+static inline void xelp_intr_enable(struct xe_device *xe, bool stall)
+{
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+
+	xe_mmio_write32(mmio, GFX_MSTR_IRQ, MASTER_IRQ);
+	if (stall)
+		xe_mmio_read32(mmio, GFX_MSTR_IRQ);
+}
+
+/* Enable/unmask the HWE interrupts for a specific GT's engines. */
+void xe_irq_enable_hwe(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 ccs_mask, bcs_mask;
+	u32 irqs, dmask, smask;
+	u32 gsc_mask = 0;
+
+	if (xe_device_uc_enabled(xe)) {
+		irqs = GT_RENDER_USER_INTERRUPT |
+			GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
+	} else {
+		irqs = GT_RENDER_USER_INTERRUPT |
+		       GT_CS_MASTER_ERROR_INTERRUPT |
+		       GT_CONTEXT_SWITCH_INTERRUPT |
+		       GT_WAIT_SEMAPHORE_INTERRUPT;
+	}
+
+	ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
+	bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
+
+	dmask = irqs << 16 | irqs;
+	smask = irqs << 16;
+
+	if (!xe_gt_is_media_type(gt)) {
+		/* Enable interrupts for each engine class */
+		xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, dmask);
+		if (ccs_mask)
+			xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, smask);
+
+		/* Unmask interrupts for each engine instance */
+		xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~smask);
+		xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~smask);
+		if (bcs_mask & (BIT(1)|BIT(2)))
+			xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
+		if (bcs_mask & (BIT(3)|BIT(4)))
+			xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
+		if (bcs_mask & (BIT(5)|BIT(6)))
+			xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
+		if (bcs_mask & (BIT(7)|BIT(8)))
+			xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
+		if (ccs_mask & (BIT(0)|BIT(1)))
+			xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask);
+		if (ccs_mask & (BIT(2)|BIT(3)))
+			xe_mmio_write32(gt,  CCS2_CCS3_INTR_MASK, ~dmask);
+	}
+
+	if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) {
+		/* Enable interrupts for each engine class */
+		xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, dmask);
+
+		/* Unmask interrupts for each engine instance */
+		xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask);
+		xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask);
+		xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask);
+
+		if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER))
+			gsc_mask = irqs;
+		else if (HAS_HECI_GSCFI(xe))
+			gsc_mask = GSC_IRQ_INTF(1);
+		if (gsc_mask) {
+			xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask);
+			xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~gsc_mask);
+		}
+	}
+}
+
+static u32
+gt_engine_identity(struct xe_device *xe,
+		   struct xe_gt *mmio,
+		   const unsigned int bank,
+		   const unsigned int bit)
+{
+	u32 timeout_ts;
+	u32 ident;
+
+	lockdep_assert_held(&xe->irq.lock);
+
+	xe_mmio_write32(mmio, IIR_REG_SELECTOR(bank), BIT(bit));
+
+	/*
+	 * NB: Specs do not specify how long to spin wait,
+	 * so we do ~100us as an educated guess.
+	 */
+	timeout_ts = (local_clock() >> 10) + 100;
+	do {
+		ident = xe_mmio_read32(mmio, INTR_IDENTITY_REG(bank));
+	} while (!(ident & INTR_DATA_VALID) &&
+		 !time_after32(local_clock() >> 10, timeout_ts));
+
+	if (unlikely(!(ident & INTR_DATA_VALID))) {
+		drm_err(&xe->drm, "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n",
+			bank, bit, ident);
+		return 0;
+	}
+
+	xe_mmio_write32(mmio, INTR_IDENTITY_REG(bank), ident);
+
+	return ident;
+}
+
+#define   OTHER_MEDIA_GUC_INSTANCE           16
+
+static void
+gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir)
+{
+	if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt))
+		return xe_guc_irq_handler(&gt->uc.guc, iir);
+	if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt))
+		return xe_guc_irq_handler(&gt->uc.guc, iir);
+
+	if (instance != OTHER_GUC_INSTANCE &&
+	    instance != OTHER_MEDIA_GUC_INSTANCE) {
+		WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
+			  instance, iir);
+	}
+}
+
+static struct xe_gt *pick_engine_gt(struct xe_tile *tile,
+				    enum xe_engine_class class,
+				    unsigned int instance)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+
+	if (MEDIA_VER(xe) < 13)
+		return tile->primary_gt;
+
+	if (class == XE_ENGINE_CLASS_VIDEO_DECODE ||
+	    class == XE_ENGINE_CLASS_VIDEO_ENHANCE)
+		return tile->media_gt;
+
+	if (class == XE_ENGINE_CLASS_OTHER &&
+	    (instance == OTHER_MEDIA_GUC_INSTANCE || instance == OTHER_GSC_INSTANCE))
+		return tile->media_gt;
+
+	return tile->primary_gt;
+}
+
+static void gt_irq_handler(struct xe_tile *tile,
+			   u32 master_ctl, unsigned long *intr_dw,
+			   u32 *identity)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_gt *mmio = tile->primary_gt;
+	unsigned int bank, bit;
+	u16 instance, intr_vec;
+	enum xe_engine_class class;
+	struct xe_hw_engine *hwe;
+
+	spin_lock(&xe->irq.lock);
+
+	for (bank = 0; bank < 2; bank++) {
+		if (!(master_ctl & GT_DW_IRQ(bank)))
+			continue;
+
+		intr_dw[bank] = xe_mmio_read32(mmio, GT_INTR_DW(bank));
+		for_each_set_bit(bit, intr_dw + bank, 32)
+			identity[bit] = gt_engine_identity(xe, mmio, bank, bit);
+		xe_mmio_write32(mmio, GT_INTR_DW(bank), intr_dw[bank]);
+
+		for_each_set_bit(bit, intr_dw + bank, 32) {
+			struct xe_gt *engine_gt;
+
+			class = INTR_ENGINE_CLASS(identity[bit]);
+			instance = INTR_ENGINE_INSTANCE(identity[bit]);
+			intr_vec = INTR_ENGINE_INTR(identity[bit]);
+
+			engine_gt = pick_engine_gt(tile, class, instance);
+
+			hwe = xe_gt_hw_engine(engine_gt, class, instance, false);
+			if (hwe) {
+				xe_hw_engine_handle_irq(hwe, intr_vec);
+				continue;
+			}
+
+			if (class == XE_ENGINE_CLASS_OTHER) {
+				/* HECI GSCFI interrupts come from outside of GT */
+				if (HAS_HECI_GSCFI(xe) && instance == OTHER_GSC_INSTANCE)
+					xe_heci_gsc_irq_handler(xe, intr_vec);
+				else
+					gt_other_irq_handler(engine_gt, instance, intr_vec);
+				continue;
+			}
+		}
+	}
+
+	spin_unlock(&xe->irq.lock);
+}
+
+/*
+ * Top-level interrupt handler for Xe_LP platforms (which did not have
+ * a "master tile" interrupt register.
+ */
+static irqreturn_t xelp_irq_handler(int irq, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_tile *tile = xe_device_get_root_tile(xe);
+	u32 master_ctl, gu_misc_iir;
+	unsigned long intr_dw[2];
+	u32 identity[32];
+
+	spin_lock(&xe->irq.lock);
+	if (!xe->irq.enabled) {
+		spin_unlock(&xe->irq.lock);
+		return IRQ_NONE;
+	}
+	spin_unlock(&xe->irq.lock);
+
+	master_ctl = xelp_intr_disable(xe);
+	if (!master_ctl) {
+		xelp_intr_enable(xe, false);
+		return IRQ_NONE;
+	}
+
+	gt_irq_handler(tile, master_ctl, intr_dw, identity);
+
+	xe_display_irq_handler(xe, master_ctl);
+
+	gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
+
+	xelp_intr_enable(xe, false);
+
+	xe_display_irq_enable(xe, gu_misc_iir);
+
+	return IRQ_HANDLED;
+}
+
+static u32 dg1_intr_disable(struct xe_device *xe)
+{
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+	u32 val;
+
+	/* First disable interrupts */
+	xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, 0);
+
+	/* Get the indication levels and ack the master unit */
+	val = xe_mmio_read32(mmio, DG1_MSTR_TILE_INTR);
+	if (unlikely(!val))
+		return 0;
+
+	xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, val);
+
+	return val;
+}
+
+static void dg1_intr_enable(struct xe_device *xe, bool stall)
+{
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+
+	xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ);
+	if (stall)
+		xe_mmio_read32(mmio, DG1_MSTR_TILE_INTR);
+}
+
+/*
+ * Top-level interrupt handler for Xe_LP+ and beyond.  These platforms have
+ * a "master tile" interrupt register which must be consulted before the
+ * "graphics master" interrupt register.
+ */
+static irqreturn_t dg1_irq_handler(int irq, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_tile *tile;
+	u32 master_tile_ctl, master_ctl = 0, gu_misc_iir = 0;
+	unsigned long intr_dw[2];
+	u32 identity[32];
+	u8 id;
+
+	/* TODO: This really shouldn't be copied+pasted */
+
+	spin_lock(&xe->irq.lock);
+	if (!xe->irq.enabled) {
+		spin_unlock(&xe->irq.lock);
+		return IRQ_NONE;
+	}
+	spin_unlock(&xe->irq.lock);
+
+	master_tile_ctl = dg1_intr_disable(xe);
+	if (!master_tile_ctl) {
+		dg1_intr_enable(xe, false);
+		return IRQ_NONE;
+	}
+
+	for_each_tile(tile, xe, id) {
+		struct xe_gt *mmio = tile->primary_gt;
+
+		if ((master_tile_ctl & DG1_MSTR_TILE(tile->id)) == 0)
+			continue;
+
+		master_ctl = xe_mmio_read32(mmio, GFX_MSTR_IRQ);
+
+		/*
+		 * We might be in irq handler just when PCIe DPC is initiated
+		 * and all MMIO reads will be returned with all 1's. Ignore this
+		 * irq as device is inaccessible.
+		 */
+		if (master_ctl == REG_GENMASK(31, 0)) {
+			dev_dbg(tile_to_xe(tile)->drm.dev,
+				"Ignore this IRQ as device might be in DPC containment.\n");
+			return IRQ_HANDLED;
+		}
+
+		xe_mmio_write32(mmio, GFX_MSTR_IRQ, master_ctl);
+
+		gt_irq_handler(tile, master_ctl, intr_dw, identity);
+
+		/*
+		 * Display interrupts (including display backlight operations
+		 * that get reported as Gunit GSE) would only be hooked up to
+		 * the primary tile.
+		 */
+		if (id == 0) {
+			xe_display_irq_handler(xe, master_ctl);
+			gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
+		}
+	}
+
+	dg1_intr_enable(xe, false);
+	xe_display_irq_enable(xe, gu_misc_iir);
+
+	return IRQ_HANDLED;
+}
+
+static void gt_irq_reset(struct xe_tile *tile)
+{
+	struct xe_gt *mmio = tile->primary_gt;
+
+	u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt,
+						   XE_ENGINE_CLASS_COMPUTE);
+	u32 bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt,
+						   XE_ENGINE_CLASS_COPY);
+
+	/* Disable RCS, BCS, VCS and VECS class engines. */
+	xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, 0);
+	xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, 0);
+	if (ccs_mask)
+		xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, 0);
+
+	/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
+	xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK,	~0);
+	xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK,	~0);
+	if (bcs_mask & (BIT(1)|BIT(2)))
+		xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~0);
+	if (bcs_mask & (BIT(3)|BIT(4)))
+		xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~0);
+	if (bcs_mask & (BIT(5)|BIT(6)))
+		xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~0);
+	if (bcs_mask & (BIT(7)|BIT(8)))
+		xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~0);
+	xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK,	~0);
+	xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK,	~0);
+	xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK,	~0);
+	if (ccs_mask & (BIT(0)|BIT(1)))
+		xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~0);
+	if (ccs_mask & (BIT(2)|BIT(3)))
+		xe_mmio_write32(mmio,  CCS2_CCS3_INTR_MASK, ~0);
+
+	if ((tile->media_gt &&
+	     xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) ||
+	    HAS_HECI_GSCFI(tile_to_xe(tile))) {
+		xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0);
+		xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0);
+	}
+
+	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0);
+	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_MASK,  ~0);
+	xe_mmio_write32(mmio, GUC_SG_INTR_ENABLE,	 0);
+	xe_mmio_write32(mmio, GUC_SG_INTR_MASK,		~0);
+}
+
+static void xelp_irq_reset(struct xe_tile *tile)
+{
+	xelp_intr_disable(tile_to_xe(tile));
+
+	gt_irq_reset(tile);
+
+	mask_and_disable(tile, PCU_IRQ_OFFSET);
+}
+
+static void dg1_irq_reset(struct xe_tile *tile)
+{
+	if (tile->id == 0)
+		dg1_intr_disable(tile_to_xe(tile));
+
+	gt_irq_reset(tile);
+
+	mask_and_disable(tile, PCU_IRQ_OFFSET);
+}
+
+static void dg1_irq_reset_mstr(struct xe_tile *tile)
+{
+	struct xe_gt *mmio = tile->primary_gt;
+
+	xe_mmio_write32(mmio, GFX_MSTR_IRQ, ~0);
+}
+
+static void xe_irq_reset(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	u8 id;
+
+	for_each_tile(tile, xe, id) {
+		if (GRAPHICS_VERx100(xe) >= 1210)
+			dg1_irq_reset(tile);
+		else
+			xelp_irq_reset(tile);
+	}
+
+	tile = xe_device_get_root_tile(xe);
+	mask_and_disable(tile, GU_MISC_IRQ_OFFSET);
+	xe_display_irq_reset(xe);
+
+	/*
+	 * The tile's top-level status register should be the last one
+	 * to be reset to avoid possible bit re-latching from lower
+	 * level interrupts.
+	 */
+	if (GRAPHICS_VERx100(xe) >= 1210) {
+		for_each_tile(tile, xe, id)
+			dg1_irq_reset_mstr(tile);
+	}
+}
+
+static void xe_irq_postinstall(struct xe_device *xe)
+{
+	xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe));
+
+	/*
+	 * ASLE backlight operations are reported via GUnit GSE interrupts
+	 * on the root tile.
+	 */
+	unmask_and_enable(xe_device_get_root_tile(xe),
+			  GU_MISC_IRQ_OFFSET, GU_MISC_GSE);
+
+	/* Enable top-level interrupts */
+	if (GRAPHICS_VERx100(xe) >= 1210)
+		dg1_intr_enable(xe, true);
+	else
+		xelp_intr_enable(xe, true);
+}
+
+static irq_handler_t xe_irq_handler(struct xe_device *xe)
+{
+	if (GRAPHICS_VERx100(xe) >= 1210)
+		return dg1_irq_handler;
+	else
+		return xelp_irq_handler;
+}
+
+static void irq_uninstall(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	int irq;
+
+	if (!xe->irq.enabled)
+		return;
+
+	xe->irq.enabled = false;
+	xe_irq_reset(xe);
+
+	irq = pci_irq_vector(pdev, 0);
+	free_irq(irq, xe);
+}
+
+int xe_irq_install(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	irq_handler_t irq_handler;
+	int err, irq;
+
+	irq_handler = xe_irq_handler(xe);
+	if (!irq_handler) {
+		drm_err(&xe->drm, "No supported interrupt handler");
+		return -EINVAL;
+	}
+
+	xe_irq_reset(xe);
+
+	err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+	if (err < 0) {
+		drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err);
+		return err;
+	}
+
+	irq = pci_irq_vector(pdev, 0);
+	err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe);
+	if (err < 0) {
+		drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err);
+		return err;
+	}
+
+	xe->irq.enabled = true;
+
+	xe_irq_postinstall(xe);
+
+	err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe);
+	if (err)
+		goto free_irq_handler;
+
+	return 0;
+
+free_irq_handler:
+	free_irq(irq, xe);
+
+	return err;
+}
+
+void xe_irq_shutdown(struct xe_device *xe)
+{
+	irq_uninstall(&xe->drm, xe);
+}
+
+void xe_irq_suspend(struct xe_device *xe)
+{
+	int irq = to_pci_dev(xe->drm.dev)->irq;
+
+	spin_lock_irq(&xe->irq.lock);
+	xe->irq.enabled = false; /* no new irqs */
+	spin_unlock_irq(&xe->irq.lock);
+
+	synchronize_irq(irq); /* flush irqs */
+	xe_irq_reset(xe); /* turn irqs off */
+}
+
+void xe_irq_resume(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	int id;
+
+	/*
+	 * lock not needed:
+	 * 1. no irq will arrive before the postinstall
+	 * 2. display is not yet resumed
+	 */
+	xe->irq.enabled = true;
+	xe_irq_reset(xe);
+	xe_irq_postinstall(xe); /* turn irqs on */
+
+	for_each_gt(gt, xe, id)
+		xe_irq_enable_hwe(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h
new file mode 100644
index 000000000000..bc42bc90d967
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_irq.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_IRQ_H_
+#define _XE_IRQ_H_
+
+struct xe_device;
+struct xe_tile;
+struct xe_gt;
+
+int xe_irq_install(struct xe_device *xe);
+void xe_irq_shutdown(struct xe_device *xe);
+void xe_irq_suspend(struct xe_device *xe);
+void xe_irq_resume(struct xe_device *xe);
+void xe_irq_enable_hwe(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
new file mode 100644
index 000000000000..0d7c5514e092
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/align.h>
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_sriov_regs.h"
+
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_lmtt.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_res_cursor.h"
+#include "xe_sriov.h"
+#include "xe_sriov_printk.h"
+
+/**
+ * DOC: Local Memory Translation Table
+ *
+ * The Local Memory Translation Table (LMTT) provides additional abstraction
+ * when Virtual Function (VF) is accessing device Local Memory (VRAM).
+ *
+ * The Root LMTT Page Directory contains one entry for each VF. Entries are
+ * indexed by the function number (1-based, index 0 is unused).
+ *
+ * See `Two-Level LMTT Structure`_ and `Multi-Level LMTT Structure`_.
+ */
+
+#define lmtt_assert(lmtt, condition)	xe_tile_assert(lmtt_to_tile(lmtt), condition)
+#define lmtt_debug(lmtt, msg...)	xe_sriov_dbg_verbose(lmtt_to_xe(lmtt), "LMTT: " msg)
+
+static bool xe_has_multi_level_lmtt(struct xe_device *xe)
+{
+	return xe->info.platform == XE_PVC;
+}
+
+static struct xe_tile *lmtt_to_tile(struct xe_lmtt *lmtt)
+{
+	return container_of(lmtt, struct xe_tile, sriov.pf.lmtt);
+}
+
+static struct xe_device *lmtt_to_xe(struct xe_lmtt *lmtt)
+{
+	return tile_to_xe(lmtt_to_tile(lmtt));
+}
+
+static u64 lmtt_page_size(struct xe_lmtt *lmtt)
+{
+	return BIT_ULL(lmtt->ops->lmtt_pte_shift(0));
+}
+
+static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level)
+{
+	unsigned int num_entries = level ? lmtt->ops->lmtt_pte_num(level) : 0;
+	struct xe_lmtt_pt *pt;
+	struct xe_bo *bo;
+	int err;
+
+	pt = kzalloc(struct_size(pt, entries, num_entries), GFP_KERNEL);
+	if (!pt) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	bo = xe_bo_create_pin_map(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), NULL,
+				  PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+					     lmtt->ops->lmtt_pte_num(level)),
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto out_free_pt;
+	}
+
+	lmtt_assert(lmtt, xe_bo_is_vram(bo));
+
+	pt->level = level;
+	pt->bo = bo;
+	return pt;
+
+out_free_pt:
+	kfree(pt);
+out:
+	return ERR_PTR(err);
+}
+
+static void lmtt_pt_free(struct xe_lmtt_pt *pt)
+{
+	xe_bo_unpin_map_no_vm(pt->bo);
+	kfree(pt);
+}
+
+static int lmtt_init_pd(struct xe_lmtt *lmtt)
+{
+	struct xe_lmtt_pt *pd;
+
+	lmtt_assert(lmtt, !lmtt->pd);
+	lmtt_assert(lmtt, lmtt->ops->lmtt_root_pd_level());
+
+	pd = lmtt_pt_alloc(lmtt, lmtt->ops->lmtt_root_pd_level());
+	if (IS_ERR(pd))
+		return PTR_ERR(pd);
+
+	lmtt->pd = pd;
+	return 0;
+}
+
+static void lmtt_fini_pd(struct xe_lmtt *lmtt)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	unsigned int num_entries = lmtt->ops->lmtt_pte_num(pd->level);
+	unsigned int n = 0;
+
+	/* make sure we don't leak */
+	for (n = 0; n < num_entries; n++)
+		lmtt_assert(lmtt, !pd->entries[n]);
+
+	lmtt->pd = NULL;
+	lmtt_pt_free(pd);
+}
+
+static void fini_lmtt(struct drm_device *drm, void *arg)
+{
+	struct xe_lmtt *lmtt = arg;
+
+	lmtt_assert(lmtt, !(!!lmtt->ops ^ !!lmtt->pd));
+
+	if (!lmtt->pd)
+		return;
+
+	lmtt_fini_pd(lmtt);
+	lmtt->ops = NULL;
+}
+
+/**
+ * xe_lmtt_init - LMTT software initialization.
+ * @lmtt: the &xe_lmtt to initialize
+ *
+ * The LMTT initialization requires two steps.
+ *
+ * The xe_lmtt_init() checks if LMTT is required on current device and selects
+ * and initialize proper variant of the LMTT Root Directory. Currently supported
+ * variants are `Two-Level LMTT Structure`_ and `Multi-Level LMTT Structure`_.
+ *
+ * In next step xe_lmtt_init_hw() will register this directory on the hardware.
+ *
+ * Notes:
+ * The LMTT allocations are managed and will be implicitly released on driver unload.
+ * This function shall be called only once and only when running as a PF driver.
+ * Any LMTT initialization failure should block VFs enabling.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_lmtt_init(struct xe_lmtt *lmtt)
+{
+	struct xe_device *xe = lmtt_to_xe(lmtt);
+	int err;
+
+	lmtt_assert(lmtt, IS_SRIOV_PF(xe));
+	lmtt_assert(lmtt, !lmtt->ops);
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	if (xe_has_multi_level_lmtt(xe))
+		lmtt->ops = &lmtt_ml_ops;
+	else
+		lmtt->ops = &lmtt_2l_ops;
+
+	err = lmtt_init_pd(lmtt);
+	if (unlikely(err))
+		goto fail;
+
+	return drmm_add_action_or_reset(&xe->drm, fini_lmtt, lmtt);
+
+fail:
+	lmtt->ops = NULL;
+	return err;
+}
+
+static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt)
+{
+	struct xe_tile *tile = lmtt_to_tile(lmtt);
+	struct xe_device *xe = tile_to_xe(tile);
+	dma_addr_t offset = xe_bo_main_addr(lmtt->pd->bo, XE_PAGE_SIZE);
+
+	lmtt_debug(lmtt, "DIR offset %pad\n", &offset);
+	lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo));
+	lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K));
+
+	xe_mmio_write32(tile->primary_gt,
+			GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG,
+			LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K));
+}
+
+/**
+ * xe_lmtt_init_hw - Perform LMTT hardware initialization.
+ * @lmtt: the &xe_lmtt to initialize
+ *
+ * This function is a second step of the LMTT initialization.
+ * This function registers LMTT Root Directory prepared in xe_lmtt_init().
+ *
+ * This function shall be called after every hardware reset.
+ * This function shall be called only when running as a PF driver.
+ */
+void xe_lmtt_init_hw(struct xe_lmtt *lmtt)
+{
+	if (!lmtt->pd)
+		return;
+
+	lmtt_setup_dir_ptr(lmtt);
+}
+
+static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt,
+			   u64 pte, unsigned int idx)
+{
+	unsigned int level = pt->level;
+
+	lmtt_assert(lmtt, idx <= lmtt->ops->lmtt_pte_num(level));
+	lmtt_debug(lmtt, "WRITE level=%u index=%u pte=%#llx\n", level, idx, pte);
+
+	switch (lmtt->ops->lmtt_pte_size(level)) {
+	case sizeof(u32):
+		xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte);
+		break;
+	case sizeof(u64):
+		xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte);
+		break;
+	default:
+		lmtt_assert(lmtt, !!!"invalid pte size");
+	}
+}
+
+static void lmtt_destroy_pt(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pd)
+{
+	unsigned int num_entries = pd->level ? lmtt->ops->lmtt_pte_num(pd->level) : 0;
+	struct xe_lmtt_pt *pt;
+	unsigned int i;
+
+	for (i = 0; i < num_entries; i++) {
+		pt = pd->entries[i];
+		pd->entries[i] = NULL;
+		if (!pt)
+			continue;
+
+		lmtt_destroy_pt(lmtt, pt);
+	}
+
+	lmtt_pt_free(pd);
+}
+
+static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	struct xe_lmtt_pt *pt;
+
+	pt = pd->entries[vfid];
+	pd->entries[vfid] = NULL;
+	if (!pt)
+		return;
+
+	lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid);
+
+	lmtt_assert(lmtt, pd->level > 0);
+	lmtt_assert(lmtt, pt->level == pd->level - 1);
+	lmtt_destroy_pt(lmtt, pt);
+}
+
+static int __lmtt_alloc_range(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pd,
+			      u64 start, u64 end)
+{
+	u64 pte_addr_shift = BIT_ULL(lmtt->ops->lmtt_pte_shift(pd->level));
+	u64 offset;
+	int err;
+
+	lmtt_assert(lmtt, pd->level > 0);
+
+	offset = start;
+	while (offset < end) {
+		struct xe_lmtt_pt *pt;
+		u64 next, pde, pt_addr;
+		unsigned int idx;
+
+		pt = lmtt_pt_alloc(lmtt, pd->level - 1);
+		if (IS_ERR(pt))
+			return PTR_ERR(pt);
+
+		pt_addr = xe_bo_main_addr(pt->bo, XE_PAGE_SIZE);
+
+		idx = lmtt->ops->lmtt_pte_index(offset, pd->level);
+		pde = lmtt->ops->lmtt_pte_encode(pt_addr, pd->level);
+
+		lmtt_write_pte(lmtt, pd, pde, idx);
+
+		pd->entries[idx] = pt;
+
+		next = min(end, round_up(offset + 1, pte_addr_shift));
+
+		if (pt->level != 0) {
+			err = __lmtt_alloc_range(lmtt, pt, offset, next);
+			if (err)
+				return err;
+		}
+
+		offset = next;
+	}
+
+	return 0;
+}
+
+static int lmtt_alloc_range(struct xe_lmtt *lmtt, unsigned int vfid, u64 start, u64 end)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	struct xe_lmtt_pt *pt;
+	u64 pt_addr;
+	u64 pde;
+	int err;
+
+	lmtt_assert(lmtt, pd->level > 0);
+	lmtt_assert(lmtt, vfid <= lmtt->ops->lmtt_pte_num(pd->level));
+	lmtt_assert(lmtt, IS_ALIGNED(start, lmtt_page_size(lmtt)));
+	lmtt_assert(lmtt, IS_ALIGNED(end, lmtt_page_size(lmtt)));
+
+	if (pd->entries[vfid])
+		return -ENOTEMPTY;
+
+	pt = lmtt_pt_alloc(lmtt, pd->level - 1);
+	if (IS_ERR(pt))
+		return PTR_ERR(pt);
+
+	pt_addr = xe_bo_main_addr(pt->bo, XE_PAGE_SIZE);
+
+	pde = lmtt->ops->lmtt_pte_encode(pt_addr, pd->level);
+
+	lmtt_write_pte(lmtt, pd, pde, vfid);
+
+	pd->entries[vfid] = pt;
+
+	if (pt->level != 0) {
+		err = __lmtt_alloc_range(lmtt, pt, start, end);
+		if (err)
+			goto out_free_pt;
+	}
+
+	return 0;
+
+out_free_pt:
+	lmtt_pt_free(pt);
+	return err;
+}
+
+static struct xe_lmtt_pt *lmtt_leaf_pt(struct xe_lmtt *lmtt, unsigned int vfid, u64 addr)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	struct xe_lmtt_pt *pt;
+
+	lmtt_assert(lmtt, vfid <= lmtt->ops->lmtt_pte_num(pd->level));
+	pt = pd->entries[vfid];
+
+	while (pt->level) {
+		lmtt_assert(lmtt, lmtt->ops->lmtt_pte_index(addr, pt->level) <=
+			    lmtt->ops->lmtt_pte_num(pt->level));
+
+		pt = pt->entries[lmtt->ops->lmtt_pte_index(addr, pt->level)];
+
+		addr >>= lmtt->ops->lmtt_pte_shift(pt->level);
+	}
+
+	lmtt_assert(lmtt, lmtt->ops->lmtt_pte_index(addr, pt->level) <=
+		    lmtt->ops->lmtt_pte_num(pt->level));
+	lmtt_assert(lmtt, pt->level != pd->level);
+	lmtt_assert(lmtt, pt->level == 0);
+	return pt;
+}
+
+static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 start)
+{
+	u64 page_size = lmtt_page_size(lmtt);
+	struct xe_res_cursor cur;
+	struct xe_lmtt_pt *pt;
+	u64 addr, vram_offset;
+
+	lmtt_assert(lmtt, IS_ALIGNED(start, page_size));
+	lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size));
+	lmtt_assert(lmtt, xe_bo_is_vram(bo));
+
+	vram_offset = vram_region_gpu_offset(bo->ttm.resource);
+	xe_res_first(bo->ttm.resource, 0, bo->size, &cur);
+	while (cur.remaining) {
+		addr = xe_res_dma(&cur);
+		addr += vram_offset; /* XXX */
+
+		pt = lmtt_leaf_pt(lmtt, vfid, start);
+
+		lmtt_write_pte(lmtt, pt, lmtt->ops->lmtt_pte_encode(addr, 0),
+					 lmtt->ops->lmtt_pte_index(start, 0));
+
+		xe_res_next(&cur, page_size);
+		start += page_size;
+	}
+}
+
+/**
+ * xe_lmtt_prepare_pages - Create VF's LMTT Page Tables.
+ * @lmtt: the &xe_lmtt to update
+ * @vfid: the VF identifier (1-based)
+ * @range: top range of LMEM offset to be supported
+ *
+ * This function creates empty LMTT page tables for given VF to support
+ * up to maximum #range LMEM offset. The LMTT page tables created by this
+ * function must be released using xe_lmtt_drop_pages() function.
+ *
+ * Notes:
+ * This function shall be called only after successful LMTT initialization.
+ * See xe_lmtt_init().
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range)
+{
+	lmtt_assert(lmtt, lmtt->pd);
+	lmtt_assert(lmtt, vfid);
+
+	return lmtt_alloc_range(lmtt, vfid, 0, range);
+}
+
+/**
+ * xe_lmtt_populate_pages - Update VF's LMTT Page Table Entries.
+ * @lmtt: the &xe_lmtt to update
+ * @vfid: the VF identifier (1-based)
+ * @bo: the buffer object with LMEM allocation to be mapped
+ * @offset: the offset at which #bo should be mapped
+ *
+ * This function updates VF's LMTT entries to use given buffer object as a backstore.
+ *
+ * Notes:
+ * This function shall be called only after successful preparation of the
+ * VF's LMTT Page Tables. See xe_lmtt_prepare().
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset)
+{
+	lmtt_assert(lmtt, lmtt->pd);
+	lmtt_assert(lmtt, vfid);
+
+	lmtt_insert_bo(lmtt, vfid, bo, offset);
+	return 0;
+}
+
+/**
+ * xe_lmtt_drop_pages - Remove VF's LMTT Pages.
+ * @lmtt: the &xe_lmtt to update
+ * @vfid: the VF identifier (1-based)
+ *
+ * This function removes all LMTT Page Tables prepared by xe_lmtt_prepare_pages().
+ *
+ * This function shall be called only after successful LMTT initialization.
+ * See xe_lmtt_init().
+ */
+void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid)
+{
+	lmtt_assert(lmtt, lmtt->pd);
+	lmtt_assert(lmtt, vfid);
+
+	lmtt_drop_pages(lmtt, vfid);
+}
+
+/**
+ * xe_lmtt_estimate_pt_size - Estimate size of LMTT PT allocations.
+ * @lmtt: the &xe_lmtt
+ * @size: the size of the LMEM to be mapped over LMTT (including any offset)
+ *
+ * This function shall be called only by PF.
+ *
+ * Return: size of the PT allocation(s) needed to support given LMEM size.
+ */
+u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size)
+{
+	unsigned int level = 0;
+	u64 pt_size;
+
+	lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt)));
+	lmtt_assert(lmtt, IS_DGFX(lmtt_to_xe(lmtt)));
+	lmtt_assert(lmtt, lmtt->ops);
+
+	pt_size = PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+			     lmtt->ops->lmtt_pte_num(level));
+
+	while (++level < lmtt->ops->lmtt_root_pd_level()) {
+		pt_size *= lmtt->ops->lmtt_pte_index(size, level) + 1;
+		pt_size += PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+				      lmtt->ops->lmtt_pte_num(level));
+	}
+
+	return pt_size;
+}
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_lmtt_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h
new file mode 100644
index 000000000000..cb10ef994db6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_LMTT_H_
+#define _XE_LMTT_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+struct xe_lmtt;
+struct xe_lmtt_ops;
+
+#ifdef CONFIG_PCI_IOV
+int xe_lmtt_init(struct xe_lmtt *lmtt);
+void xe_lmtt_init_hw(struct xe_lmtt *lmtt);
+int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range);
+int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset);
+void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid);
+u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size);
+#else
+static inline int xe_lmtt_init(struct xe_lmtt *lmtt) { return 0; }
+static inline void xe_lmtt_init_hw(struct xe_lmtt *lmtt) { }
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lmtt_2l.c b/drivers/gpu/drm/xe/xe_lmtt_2l.c
new file mode 100644
index 000000000000..84bc5c4212b5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt_2l.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/align.h>
+#include <linux/bitfield.h>
+#include <linux/log2.h>
+#include <linux/sizes.h>
+
+#include "xe_lmtt_types.h"
+#include "xe_macros.h"
+
+/**
+ * DOC: Two-Level LMTT Structure
+ *
+ * LMHAW (Local Memory Host Address Width) is 37 bit (128GB)
+ *
+ * LMGAW (Local Memory Guest Address Width) is 37 bit (128GB)
+ *
+ * The following figure illustrates the structure and function of the 2L LMTT::
+ *
+ *            LMTT Directory
+ *           (1 Entry per VF)
+ *            +-----------+                     LMTT (per VF)
+ *            |           |                     +-----------+
+ *            |           |                     |           |
+ *            |           |          index:     |           |
+ *            |           |          LMEM VF    +===========+
+ *            |           |          offset --> |    PTE    | ==> LMEM PF offset
+ *            |           |                     +===========+
+ *   index:   +===========+                     |           |
+ *   VFID --> |    PDE    |  -----------------> +-----------+
+ *            +===========+                    /              \.
+ *            |           |                   /                 \.
+ *            |           |                  /                    \.
+ *            |           |                 /                       \.
+ *            +-----------+ <== [LMTT Directory Ptr]                  \.
+ *           /             \              /                             \.
+ *          /               \         +-----------+-----------------+------+---+
+ *         /                 \        | 31:HAW-16 |        HAW-17:5 |  4:1 | 0 |
+ *        /                   \       +===========+=================+======+===+
+ *       /                     \      |  Reserved | LMEM Page (2MB) | Rsvd | V |
+ *      /                       \     +-----------+-----------------+------+---+
+ *     /                         \.
+ *   +-----------+-----------------+------+---+
+ *   | 31:HAW-12 |        HAW-13:4 |  3:1 | 0 |
+ *   +===========+=================+======+===+
+ *   |  Reserved | LMTT Ptr (64KB) | Rsvd | V |
+ *   +-----------+-----------------+------+---+
+ *
+ */
+
+typedef u32 lmtt_2l_pde_t;
+typedef u32 lmtt_2l_pte_t;
+
+#if IS_ENABLED(CONFIG_DRM_XE_LMTT_2L_128GB)
+#define LMTT_2L_HAW			37 /* 128 GiB */
+#else
+#define LMTT_2L_HAW			35 /* 32 GiB */
+#endif
+
+#define LMTT_2L_PDE_MAX_NUM		64 /* SRIOV with PF and 63 VFs, index 0 (PF) is unused */
+#define LMTT_2L_PDE_LMTT_PTR		GENMASK(LMTT_2L_HAW - 13, 4)
+#define LMTT_2L_PDE_VALID		BIT(0)
+
+#define LMTT_2L_PTE_MAX_NUM		BIT(LMTT_2L_HAW - ilog2(SZ_2M))
+#define LMTT_2L_PTE_LMEM_PAGE		GENMASK(LMTT_2L_HAW - 17, 5)
+#define LMTT_2L_PTE_VALID		BIT(0)
+
+static unsigned int lmtt_2l_root_pd_level(void)
+{
+	return 1; /* implementation is 0-based */
+}
+
+static unsigned int lmtt_2l_pte_num(unsigned int level)
+{
+	switch (level) {
+	case 1:
+		return LMTT_2L_PDE_MAX_NUM;
+	case 0:
+		BUILD_BUG_ON(LMTT_2L_HAW == 37 && LMTT_2L_PTE_MAX_NUM != SZ_64K);
+		BUILD_BUG_ON(LMTT_2L_HAW == 35 && LMTT_2L_PTE_MAX_NUM != SZ_16K);
+		return LMTT_2L_PTE_MAX_NUM;
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_2l_pte_size(unsigned int level)
+{
+	switch (level) {
+	case 1:
+		return sizeof(lmtt_2l_pde_t);
+	case 0:
+		return sizeof(lmtt_2l_pte_t);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_2l_pte_shift(unsigned int level)
+{
+	switch (level) {
+	case 0:
+		return ilog2(SZ_2M);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_2l_pte_index(u64 addr, unsigned int level)
+{
+	addr >>= lmtt_2l_pte_shift(level);
+
+	switch (level) {
+	case 0:
+		/* SZ_2M increments */
+		BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_2L_PTE_MAX_NUM);
+		return addr & (LMTT_2L_PTE_MAX_NUM - 1);
+	default:
+		return 0;
+	}
+}
+
+static u64 lmtt_2l_pte_encode(unsigned long offset, unsigned int level)
+{
+	switch (level) {
+	case 0:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_2M));
+		XE_WARN_ON(!FIELD_FIT(LMTT_2L_PTE_LMEM_PAGE, offset / SZ_2M));
+		return FIELD_PREP(LMTT_2L_PTE_LMEM_PAGE, offset / SZ_2M) | LMTT_2L_PTE_VALID;
+	case 1:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_64K));
+		XE_WARN_ON(!FIELD_FIT(LMTT_2L_PDE_LMTT_PTR, offset / SZ_64K));
+		return FIELD_PREP(LMTT_2L_PDE_LMTT_PTR, offset / SZ_64K) | LMTT_2L_PDE_VALID;
+	default:
+		XE_WARN_ON(true);
+		return 0;
+	}
+}
+
+const struct xe_lmtt_ops lmtt_2l_ops = {
+	.lmtt_root_pd_level = lmtt_2l_root_pd_level,
+	.lmtt_pte_num = lmtt_2l_pte_num,
+	.lmtt_pte_size = lmtt_2l_pte_size,
+	.lmtt_pte_shift = lmtt_2l_pte_shift,
+	.lmtt_pte_index = lmtt_2l_pte_index,
+	.lmtt_pte_encode = lmtt_2l_pte_encode,
+};
diff --git a/drivers/gpu/drm/xe/xe_lmtt_ml.c b/drivers/gpu/drm/xe/xe_lmtt_ml.c
new file mode 100644
index 000000000000..b21215a2edd6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt_ml.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/align.h>
+#include <linux/bitfield.h>
+#include <linux/log2.h>
+#include <linux/sizes.h>
+
+#include "xe_lmtt_types.h"
+#include "xe_macros.h"
+
+/**
+ * DOC: Multi-Level LMTT Structure
+ *
+ * LMHAW (Local Memory Host Address Width) is 48 bit (256TB)
+ *
+ * LMGAW (Local Memory Guest Address Width) is 48 bit (256TB)
+ *
+ * The following figure illustrates the structure and function of the ML LMTT::
+ *
+ *           LMTT L3 Directory
+ *           (1 Entry per VF)                                       LMTT L1 Leaf
+ *            +-----------+                                         +-----------+
+ *            |           |             LMTT L2 (per VF)            |           |
+ *            |           |              +-----------+              |           |
+ *            |           |              |           |     index:   +===========+
+ *            |           |              |           |     GDPA --> |    PTE    | => LMEM PF offset
+ *            |           |              |           |     34:21    +===========+
+ *            |           |    index:    |           |              |           |
+ *            |           |    LMEM VF   +===========+              |           |
+ *            |           |    offset -> |    PTE    |  ----------> +-----------+
+ *            |           |    GAW-1:35  +===========+              /           \.
+ *   index:   +===========+              |           |             /              \.
+ *   VFID --> |    PDE    |  --------->  +-----------+            /                 \.
+ *            +===========+             /           /            /                    \.
+ *            |           |           /            /            /                       \.
+ *            +-----------+  <== [LMTT Directory Ptr]          /                          \.
+ *           /             \      /              /            /                             \.
+ *          /                \  /               /       +-----------+-----------------+------+---+
+ *         /                  /\               /        | 31:HAW-16 |        HAW-17:5 |  4:1 | 0 |
+ *        /                 /    \            /         +===========+=================+======+===+
+ *       /                /        \         /          |  Reserved | LMEM Page (2MB) | Rsvd | V |
+ *      /                                   /           +-----------+-----------------+------+---+
+ *     /                                   /
+ *  +-----------+-----------------+------+---+
+ *  | 63:HAW-12 |        HAW-13:4 |  3:1 | 0 |
+ *  +===========+=================+======+===+
+ *  |  Reserved | LMTT Ptr (64KB) | Rsvd | V |
+ *  +-----------+-----------------+------+---+
+ *
+ */
+
+typedef u64 lmtt_ml_pde_t;
+typedef u32 lmtt_ml_pte_t;
+
+#define LMTT_ML_HAW			48 /* 256 TiB */
+
+#define LMTT_ML_PDE_MAX_NUM		64 /* SRIOV with PF and 63 VFs, index 0 (PF) is unused */
+#define LMTT_ML_PDE_LMTT_PTR		GENMASK_ULL(LMTT_ML_HAW - 13, 4)
+#define LMTT_ML_PDE_VALID		BIT(0)
+
+#define LMTT_ML_PDE_L2_SHIFT		35
+#define LMTT_ML_PDE_L2_MAX_NUM		BIT_ULL(LMTT_ML_HAW - 35)
+
+#define LMTT_ML_PTE_MAX_NUM		BIT(35 - ilog2(SZ_2M))
+#define LMTT_ML_PTE_LMEM_PAGE		GENMASK(LMTT_ML_HAW - 17, 5)
+#define LMTT_ML_PTE_VALID		BIT(0)
+
+static unsigned int lmtt_ml_root_pd_level(void)
+{
+	return 2; /* implementation is 0-based */
+}
+
+static unsigned int lmtt_ml_pte_num(unsigned int level)
+{
+	switch (level) {
+	case 2:
+		return LMTT_ML_PDE_MAX_NUM;
+	case 1:
+		BUILD_BUG_ON(LMTT_ML_HAW == 48 && LMTT_ML_PDE_L2_MAX_NUM != SZ_8K);
+		return LMTT_ML_PDE_L2_MAX_NUM;
+	case 0:
+		BUILD_BUG_ON(LMTT_ML_PTE_MAX_NUM != SZ_16K);
+		return LMTT_ML_PTE_MAX_NUM;
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_ml_pte_size(unsigned int level)
+{
+	switch (level) {
+	case 2:
+	case 1:
+		return sizeof(lmtt_ml_pde_t);
+	case 0:
+		return sizeof(lmtt_ml_pte_t);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_ml_pte_shift(unsigned int level)
+{
+	switch (level) {
+	case 1:
+		BUILD_BUG_ON(BIT_ULL(LMTT_ML_PDE_L2_SHIFT) != SZ_32G);
+		return ilog2(SZ_32G);
+	case 0:
+		return ilog2(SZ_2M);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_ml_pte_index(u64 addr, unsigned int level)
+{
+	addr >>= lmtt_ml_pte_shift(level);
+
+	switch (level) {
+	case 1:
+		/* SZ_32G increments */
+		BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_ML_PDE_L2_MAX_NUM);
+		return addr & (LMTT_ML_PDE_L2_MAX_NUM - 1);
+	case 0:
+		/* SZ_2M increments */
+		BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_ML_PTE_MAX_NUM);
+		return addr & (LMTT_ML_PTE_MAX_NUM - 1);
+	default:
+		return 0;
+	}
+}
+
+static u64 lmtt_ml_pte_encode(unsigned long offset, unsigned int level)
+{
+	switch (level) {
+	case 0:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_2M));
+		XE_WARN_ON(!FIELD_FIT(LMTT_ML_PTE_LMEM_PAGE, offset / SZ_2M));
+		return FIELD_PREP(LMTT_ML_PTE_LMEM_PAGE, offset / SZ_2M) | LMTT_ML_PTE_VALID;
+	case 1:
+	case 2:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_64K));
+		XE_WARN_ON(!FIELD_FIT(LMTT_ML_PDE_LMTT_PTR, offset / SZ_64K));
+		return FIELD_PREP(LMTT_ML_PDE_LMTT_PTR, offset / SZ_64K) | LMTT_ML_PDE_VALID;
+	default:
+		XE_WARN_ON(true);
+		return 0;
+	}
+}
+
+const struct xe_lmtt_ops lmtt_ml_ops = {
+	.lmtt_root_pd_level = lmtt_ml_root_pd_level,
+	.lmtt_pte_num = lmtt_ml_pte_num,
+	.lmtt_pte_size = lmtt_ml_pte_size,
+	.lmtt_pte_shift = lmtt_ml_pte_shift,
+	.lmtt_pte_index = lmtt_ml_pte_index,
+	.lmtt_pte_encode = lmtt_ml_pte_encode,
+};
diff --git a/drivers/gpu/drm/xe/xe_lmtt_types.h b/drivers/gpu/drm/xe/xe_lmtt_types.h
new file mode 100644
index 000000000000..b37abad23416
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt_types.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_LMTT_TYPES_H_
+#define _XE_LMTT_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+struct xe_lmtt;
+struct xe_lmtt_pt;
+struct xe_lmtt_ops;
+
+#define LMTT_PTE_INVALID	ULL(0)
+
+/**
+ * struct xe_lmtt - Local Memory Translation Table Manager
+ */
+struct xe_lmtt {
+	/** @pd: root LMTT Directory */
+	struct xe_lmtt_pt *pd;
+
+	/** @ops: LMTT functions */
+	const struct xe_lmtt_ops *ops;
+};
+
+/**
+ * struct xe_lmtt_pt - Local Memory Translation Table Page Table
+ *
+ * Represents single level of the LMTT.
+ */
+struct xe_lmtt_pt {
+	/** @level: page table level, 0 is leaf */
+	unsigned int level;
+
+	/** @bo: buffer object with actual LMTT PTE values */
+	struct xe_bo *bo;
+
+	/** @entries: leaf page tables, exist only for root/non-leaf */
+	struct xe_lmtt_pt *entries[];
+};
+
+/**
+ * struct xe_lmtt_ops - Local Memory Translation Table Operations
+ *
+ * Provides abstraction of the LMTT variants.
+ */
+struct xe_lmtt_ops {
+	/* private: */
+	unsigned int (*lmtt_root_pd_level)(void);
+	unsigned int (*lmtt_pte_num)(unsigned int level);
+	unsigned int (*lmtt_pte_size)(unsigned int level);
+	unsigned int (*lmtt_pte_shift)(unsigned int level);
+	unsigned int (*lmtt_pte_index)(u64 addr, unsigned int level);
+	u64 (*lmtt_pte_encode)(unsigned long offset, unsigned int level);
+};
+
+extern const struct xe_lmtt_ops lmtt_2l_ops;
+extern const struct xe_lmtt_ops lmtt_ml_ops;
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
new file mode 100644
index 000000000000..b7fa3831b684
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -0,0 +1,1272 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_lrc.h"
+
+#include "instructions/xe_mi_commands.h"
+#include "instructions/xe_gfxpipe_commands.h"
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gpu_commands.h"
+#include "regs/xe_lrc_layout.h"
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_drm_client.h"
+#include "xe_exec_queue_types.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_hw_fence.h"
+#include "xe_map.h"
+#include "xe_vm.h"
+
+#define CTX_VALID				(1 << 0)
+#define CTX_PRIVILEGE				(1 << 8)
+#define CTX_ADDRESSING_MODE_SHIFT		3
+#define LEGACY_64B_CONTEXT			3
+
+#define ENGINE_CLASS_SHIFT			61
+#define ENGINE_INSTANCE_SHIFT			48
+
+static struct xe_device *
+lrc_to_xe(struct xe_lrc *lrc)
+{
+	return gt_to_xe(lrc->fence_ctx.gt);
+}
+
+size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_RENDER:
+		if (GRAPHICS_VER(xe) >= 20)
+			return 4 * SZ_4K;
+		else
+			return 14 * SZ_4K;
+	case XE_ENGINE_CLASS_COMPUTE:
+		/* 14 pages since graphics_ver == 11 */
+		if (GRAPHICS_VER(xe) >= 20)
+			return 3 * SZ_4K;
+		else
+			return 14 * SZ_4K;
+	default:
+		WARN(1, "Unknown engine class: %d", class);
+		fallthrough;
+	case XE_ENGINE_CLASS_COPY:
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+	case XE_ENGINE_CLASS_OTHER:
+		return 2 * SZ_4K;
+	}
+}
+
+/*
+ * The per-platform tables are u8-encoded in @data. Decode @data and set the
+ * addresses' offset and commands in @regs. The following encoding is used
+ * for each byte. There are 2 steps: decoding commands and decoding addresses.
+ *
+ * Commands:
+ * [7]: create NOPs - number of NOPs are set in lower bits
+ * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
+ *      MI_LRI_FORCE_POSTED
+ * [5:0]: Number of NOPs or registers to set values to in case of
+ *        MI_LOAD_REGISTER_IMM
+ *
+ * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
+ * number of registers. They are set by using the REG/REG16 macros: the former
+ * is used for offsets smaller than 0x200 while the latter is for values bigger
+ * than that. Those macros already set all the bits documented below correctly:
+ *
+ * [7]: When a register offset needs more than 6 bits, use additional bytes, to
+ *      follow, for the lower bits
+ * [6:0]: Register offset, without considering the engine base.
+ *
+ * This function only tweaks the commands and register offsets. Values are not
+ * filled out.
+ */
+static void set_offsets(u32 *regs,
+			const u8 *data,
+			const struct xe_hw_engine *hwe)
+#define NOP(x) (BIT(7) | (x))
+#define LRI(count, flags) ((flags) << 6 | (count) | \
+			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
+#define POSTED BIT(0)
+#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
+#define REG16(x) \
+	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
+	(((x) >> 2) & 0x7f)
+#define END 0
+{
+	const u32 base = hwe->mmio_base;
+
+	while (*data) {
+		u8 count, flags;
+
+		if (*data & BIT(7)) { /* skip */
+			count = *data++ & ~BIT(7);
+			regs += count;
+			continue;
+		}
+
+		count = *data & 0x3f;
+		flags = *data >> 6;
+		data++;
+
+		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
+		if (flags & POSTED)
+			*regs |= MI_LRI_FORCE_POSTED;
+		*regs |= MI_LRI_LRM_CS_MMIO;
+		regs++;
+
+		xe_gt_assert(hwe->gt, count);
+		do {
+			u32 offset = 0;
+			u8 v;
+
+			do {
+				v = *data++;
+				offset <<= 7;
+				offset |= v & ~BIT(7);
+			} while (v & BIT(7));
+
+			regs[0] = base + (offset << 2);
+			regs += 2;
+		} while (--count);
+	}
+
+	*regs = MI_BATCH_BUFFER_END | BIT(0);
+}
+
+static const u8 gen12_xcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	END
+};
+
+static const u8 dg2_xcs_offsets[] = {
+	NOP(1),
+	LRI(15, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+	REG(0x120),
+	REG(0x124),
+
+	NOP(1),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	END
+};
+
+static const u8 gen12_rcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(3, POSTED),
+	REG(0x1b0),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+	NOP(3 + 9 + 1),
+
+	LRI(51, POSTED),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG16(0x588),
+	REG(0x028),
+	REG(0x09c),
+	REG(0x0c0),
+	REG(0x178),
+	REG(0x17c),
+	REG16(0x358),
+	REG(0x170),
+	REG(0x150),
+	REG(0x154),
+	REG(0x158),
+	REG16(0x41c),
+	REG16(0x600),
+	REG16(0x604),
+	REG16(0x608),
+	REG16(0x60c),
+	REG16(0x610),
+	REG16(0x614),
+	REG16(0x618),
+	REG16(0x61c),
+	REG16(0x620),
+	REG16(0x624),
+	REG16(0x628),
+	REG16(0x62c),
+	REG16(0x630),
+	REG16(0x634),
+	REG16(0x638),
+	REG16(0x63c),
+	REG16(0x640),
+	REG16(0x644),
+	REG16(0x648),
+	REG16(0x64c),
+	REG16(0x650),
+	REG16(0x654),
+	REG16(0x658),
+	REG16(0x65c),
+	REG16(0x660),
+	REG16(0x664),
+	REG16(0x668),
+	REG16(0x66c),
+	REG16(0x670),
+	REG16(0x674),
+	REG16(0x678),
+	REG16(0x67c),
+	REG(0x068),
+	REG(0x084),
+	NOP(1),
+
+	END
+};
+
+static const u8 xehp_rcs_offsets[] = {
+	NOP(1),
+	LRI(13, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+
+	NOP(5),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(3, POSTED),
+	REG(0x1b0),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END
+};
+
+static const u8 dg2_rcs_offsets[] = {
+	NOP(1),
+	LRI(15, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+	REG(0x120),
+	REG(0x124),
+
+	NOP(1),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	LRI(3, POSTED),
+	REG(0x1b0),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END
+};
+
+static const u8 mtl_rcs_offsets[] = {
+	NOP(1),
+	LRI(15, POSTED),
+	REG16(0x244),
+	REG(0x034),
+	REG(0x030),
+	REG(0x038),
+	REG(0x03c),
+	REG(0x168),
+	REG(0x140),
+	REG(0x110),
+	REG(0x1c0),
+	REG(0x1c4),
+	REG(0x1c8),
+	REG(0x180),
+	REG16(0x2b4),
+	REG(0x120),
+	REG(0x124),
+
+	NOP(1),
+	LRI(9, POSTED),
+	REG16(0x3a8),
+	REG16(0x28c),
+	REG16(0x288),
+	REG16(0x284),
+	REG16(0x280),
+	REG16(0x27c),
+	REG16(0x278),
+	REG16(0x274),
+	REG16(0x270),
+
+	NOP(2),
+	LRI(2, POSTED),
+	REG16(0x5a8),
+	REG16(0x5ac),
+
+	NOP(6),
+	LRI(1, 0),
+	REG(0x0c8),
+
+	END
+};
+
+#define XE2_CTX_COMMON \
+	NOP(1),                 /* [0x00] */ \
+	LRI(15, POSTED),        /* [0x01] */ \
+	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
+	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
+	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
+	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
+	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
+	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
+	REG(0x140),             /* [0x0e] BB_ADDR */ \
+	REG(0x110),             /* [0x10] BB_STATE */ \
+	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
+	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
+	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
+	REG(0x180),             /* [0x18] CCID */ \
+	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
+	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
+	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
+	\
+	NOP(1),                 /* [0x20] */ \
+	LRI(9, POSTED),         /* [0x21] */ \
+	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
+	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
+	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
+	REG16(0x284),           /* [0x28] dummy reg */ \
+	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
+	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
+	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
+	REG16(0x274),           /* [0x30] PTBP_UDW */ \
+	REG16(0x270)            /* [0x32] PTBP_LDW */
+
+static const u8 xe2_rcs_offsets[] = {
+	XE2_CTX_COMMON,
+
+	NOP(2),                 /* [0x34] */
+	LRI(2, POSTED),         /* [0x36] */
+	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
+	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
+
+	NOP(6),                 /* [0x41] */
+	LRI(1, 0),              /* [0x47] */
+	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
+
+	END
+};
+
+static const u8 xe2_bcs_offsets[] = {
+	XE2_CTX_COMMON,
+
+	NOP(4 + 8 + 1),         /* [0x34] */
+	LRI(2, POSTED),         /* [0x41] */
+	REG16(0x200),           /* [0x42] BCS_SWCTRL */
+	REG16(0x204),           /* [0x44] BLIT_CCTL */
+
+	END
+};
+
+static const u8 xe2_xcs_offsets[] = {
+	XE2_CTX_COMMON,
+
+	END
+};
+
+#undef END
+#undef REG16
+#undef REG
+#undef LRI
+#undef NOP
+
+static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
+{
+	if (class == XE_ENGINE_CLASS_RENDER) {
+		if (GRAPHICS_VER(xe) >= 20)
+			return xe2_rcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1270)
+			return mtl_rcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1255)
+			return dg2_rcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1250)
+			return xehp_rcs_offsets;
+		else
+			return gen12_rcs_offsets;
+	} else if (class == XE_ENGINE_CLASS_COPY) {
+		if (GRAPHICS_VER(xe) >= 20)
+			return xe2_bcs_offsets;
+		else
+			return gen12_xcs_offsets;
+	} else {
+		if (GRAPHICS_VER(xe) >= 20)
+			return xe2_xcs_offsets;
+		else if (GRAPHICS_VERx100(xe) >= 1255)
+			return dg2_xcs_offsets;
+		else
+			return gen12_xcs_offsets;
+	}
+}
+
+static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
+{
+	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
+				    _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
+				    CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+
+	/* TODO: Timestamp */
+}
+
+static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = gt_to_xe(hwe->gt);
+
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		return 0x70;
+	else
+		return 0x60;
+}
+
+static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
+{
+	int x;
+
+	x = lrc_ring_mi_mode(hwe);
+	regs[x + 1] &= ~STOP_RING;
+	regs[x + 1] |= STOP_RING << 16;
+}
+
+static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
+{
+	return 0;
+}
+
+u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
+{
+	return lrc->ring.size;
+}
+
+/* Make the magic macros work */
+#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
+
+#define LRC_SEQNO_PPHWSP_OFFSET 512
+#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
+#define LRC_PARALLEL_PPHWSP_OFFSET 2048
+#define LRC_PPHWSP_SIZE SZ_4K
+
+static size_t lrc_reg_size(struct xe_device *xe)
+{
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		return 96 * sizeof(u32);
+	else
+		return 80 * sizeof(u32);
+}
+
+size_t xe_lrc_skip_size(struct xe_device *xe)
+{
+	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
+}
+
+static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
+{
+	/* The seqno is stored in the driver-defined portion of PPHWSP */
+	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
+{
+	/* The start seqno is stored in the driver-defined portion of PPHWSP */
+	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
+{
+	/* The parallel is stored in the driver-defined portion of PPHWSP */
+	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
+{
+	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
+}
+
+#define DECL_MAP_ADDR_HELPERS(elem) \
+static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
+{ \
+	struct iosys_map map = lrc->bo->vmap; \
+\
+	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
+	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
+	return map; \
+} \
+static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
+{ \
+	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
+} \
+
+DECL_MAP_ADDR_HELPERS(ring)
+DECL_MAP_ADDR_HELPERS(pphwsp)
+DECL_MAP_ADDR_HELPERS(seqno)
+DECL_MAP_ADDR_HELPERS(regs)
+DECL_MAP_ADDR_HELPERS(start_seqno)
+DECL_MAP_ADDR_HELPERS(parallel)
+
+#undef DECL_MAP_ADDR_HELPERS
+
+u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_pphwsp_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+	struct iosys_map map;
+
+	map = __xe_lrc_regs_map(lrc);
+	iosys_map_incr(&map, reg_nr * sizeof(u32));
+	return xe_map_read32(xe, &map);
+}
+
+void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+	struct iosys_map map;
+
+	map = __xe_lrc_regs_map(lrc);
+	iosys_map_incr(&map, reg_nr * sizeof(u32));
+	xe_map_write32(xe, &map, val);
+}
+
+static void *empty_lrc_data(struct xe_hw_engine *hwe)
+{
+	struct xe_device *xe = gt_to_xe(hwe->gt);
+	void *data;
+	u32 *regs;
+
+	data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL);
+	if (!data)
+		return NULL;
+
+	/* 1st page: Per-Process of HW status Page */
+	regs = data + LRC_PPHWSP_SIZE;
+	set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
+	set_context_control(regs, hwe);
+	reset_stop_ring(regs, hwe);
+
+	return data;
+}
+
+static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
+{
+	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
+
+	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
+	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
+}
+
+#define PVC_CTX_ASID		(0x2e + 1)
+#define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
+#define ACC_GRANULARITY_S       20
+#define ACC_NOTIFY_S            16
+
+int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+		struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct iosys_map map;
+	void *init_data = NULL;
+	u32 arb_enable;
+	int err;
+
+	lrc->flags = 0;
+
+	/*
+	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
+	 * via VM bind calls.
+	 */
+	lrc->bo = xe_bo_create_pin_map(xe, tile, vm,
+				      ring_size + xe_lrc_size(xe, hwe->class),
+				      ttm_bo_type_kernel,
+				      XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				      XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(lrc->bo))
+		return PTR_ERR(lrc->bo);
+
+	lrc->tile = gt_to_tile(hwe->gt);
+	lrc->ring.size = ring_size;
+	lrc->ring.tail = 0;
+
+	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
+			     hwe->fence_irq, hwe->name);
+
+	if (!gt->default_lrc[hwe->class]) {
+		init_data = empty_lrc_data(hwe);
+		if (!init_data) {
+			err = -ENOMEM;
+			goto err_lrc_finish;
+		}
+	}
+
+	/*
+	 * Init Per-Process of HW status Page, LRC / context state to known
+	 * values
+	 */
+	map = __xe_lrc_pphwsp_map(lrc);
+	if (!init_data) {
+		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
+		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
+				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
+				 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE);
+	} else {
+		xe_map_memcpy_to(xe, &map, 0, init_data,
+				 xe_lrc_size(xe, hwe->class));
+		kfree(init_data);
+	}
+
+	if (vm) {
+		xe_lrc_set_ppgtt(lrc, vm);
+
+		if (vm->xef)
+			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
+	}
+
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
+			     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
+	if (xe->info.has_asid && vm)
+		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID,
+				     (q->usm.acc_granularity <<
+				      ACC_GRANULARITY_S) | vm->usm.asid);
+	if (xe->info.has_usm && vm)
+		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD,
+				     (q->usm.acc_notify << ACC_NOTIFY_S) |
+				     q->usm.acc_trigger);
+
+	lrc->desc = CTX_VALID;
+	lrc->desc |= LEGACY_64B_CONTEXT << CTX_ADDRESSING_MODE_SHIFT;
+	/* TODO: Priority */
+
+	/* While this appears to have something about privileged batches or
+	 * some such, it really just means PPGTT mode.
+	 */
+	if (vm)
+		lrc->desc |= CTX_PRIVILEGE;
+
+	if (GRAPHICS_VERx100(xe) < 1250) {
+		lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT;
+		lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT;
+	}
+
+	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
+
+	map = __xe_lrc_seqno_map(lrc);
+	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
+
+	map = __xe_lrc_start_seqno_map(lrc);
+	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
+
+	return 0;
+
+err_lrc_finish:
+	xe_lrc_finish(lrc);
+	return err;
+}
+
+void xe_lrc_finish(struct xe_lrc *lrc)
+{
+	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
+	xe_bo_lock(lrc->bo, false);
+	xe_bo_unpin(lrc->bo);
+	xe_bo_unlock(lrc->bo);
+	xe_bo_put(lrc->bo);
+}
+
+void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
+{
+	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
+}
+
+u32 xe_lrc_ring_head(struct xe_lrc *lrc)
+{
+	return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
+}
+
+u32 xe_lrc_ring_space(struct xe_lrc *lrc)
+{
+	const u32 head = xe_lrc_ring_head(lrc);
+	const u32 tail = lrc->ring.tail;
+	const u32 size = lrc->ring.size;
+
+	return ((head - tail - 1) & (size - 1)) + 1;
+}
+
+static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
+				const void *data, size_t size)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+
+	iosys_map_incr(&ring, lrc->ring.tail);
+	xe_map_memcpy_to(xe, &ring, 0, data, size);
+	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
+}
+
+void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+	struct iosys_map ring;
+	u32 rhs;
+	size_t aligned_size;
+
+	xe_assert(xe, IS_ALIGNED(size, 4));
+	aligned_size = ALIGN(size, 8);
+
+	ring = __xe_lrc_ring_map(lrc);
+
+	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
+	rhs = lrc->ring.size - lrc->ring.tail;
+	if (size > rhs) {
+		__xe_lrc_write_ring(lrc, ring, data, rhs);
+		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
+	} else {
+		__xe_lrc_write_ring(lrc, ring, data, size);
+	}
+
+	if (aligned_size > size) {
+		u32 noop = MI_NOOP;
+
+		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
+	}
+}
+
+u64 xe_lrc_descriptor(struct xe_lrc *lrc)
+{
+	return lrc->desc | xe_lrc_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_seqno_ggtt_addr(lrc);
+}
+
+struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc)
+{
+	return &xe_hw_fence_create(&lrc->fence_ctx,
+				   __xe_lrc_seqno_map(lrc))->dma;
+}
+
+s32 xe_lrc_seqno(struct xe_lrc *lrc)
+{
+	struct iosys_map map = __xe_lrc_seqno_map(lrc);
+
+	return xe_map_read32(lrc_to_xe(lrc), &map);
+}
+
+s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
+{
+	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
+
+	return xe_map_read32(lrc_to_xe(lrc), &map);
+}
+
+u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_start_seqno_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
+{
+	return __xe_lrc_parallel_ggtt_addr(lrc);
+}
+
+struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
+{
+	return __xe_lrc_parallel_map(lrc);
+}
+
+static int instr_dw(u32 cmd_header)
+{
+	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
+	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
+	    GFXPIPE_SINGLE_DW_CMD(0, 0))
+		return 1;
+
+	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
+	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
+		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
+
+	/* Most instructions have the # of dwords (minus 2) in 7:0 */
+	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
+}
+
+static int dump_mi_command(struct drm_printer *p,
+			   struct xe_gt *gt,
+			   u32 *dw,
+			   int remaining_dw)
+{
+	u32 inst_header = *dw;
+	u32 numdw = instr_dw(inst_header);
+	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
+	int num_noop;
+
+	/* First check for commands that don't have/use a '# DW' field */
+	switch (inst_header & MI_OPCODE) {
+	case MI_NOOP:
+		num_noop = 1;
+		while (num_noop < remaining_dw &&
+		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
+			num_noop++;
+		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
+		return num_noop;
+
+	case MI_TOPOLOGY_FILTER:
+		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
+		return 1;
+
+	case MI_BATCH_BUFFER_END:
+		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
+		/* Return 'remaining_dw' to consume the rest of the LRC */
+		return remaining_dw;
+	}
+
+	/*
+	 * Any remaining commands include a # of dwords.  We should make sure
+	 * it doesn't exceed the remaining size of the LRC.
+	 */
+	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
+		numdw = remaining_dw;
+
+	switch (inst_header & MI_OPCODE) {
+	case MI_LOAD_REGISTER_IMM:
+		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
+			   inst_header, (numdw - 1) / 2);
+		for (int i = 1; i < numdw; i += 2)
+			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
+		return numdw;
+
+	case MI_FORCE_WAKEUP:
+		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
+		return numdw;
+
+	default:
+		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
+			   inst_header, opcode, numdw);
+		return numdw;
+	}
+}
+
+static int dump_gfxpipe_command(struct drm_printer *p,
+				struct xe_gt *gt,
+				u32 *dw,
+				int remaining_dw)
+{
+	u32 numdw = instr_dw(*dw);
+	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
+	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
+	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
+
+	/*
+	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
+	 * remaining size of the LRC.
+	 */
+	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
+		numdw = remaining_dw;
+
+	switch (*dw & GFXPIPE_MATCH_MASK) {
+#define MATCH(cmd) \
+	case cmd: \
+		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
+		return numdw
+#define MATCH3D(cmd) \
+	case CMD_##cmd: \
+		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
+		return numdw
+
+	MATCH(STATE_BASE_ADDRESS);
+	MATCH(STATE_SIP);
+	MATCH(GPGPU_CSR_BASE_ADDRESS);
+	MATCH(STATE_COMPUTE_MODE);
+	MATCH3D(3DSTATE_BTD);
+
+	MATCH3D(3DSTATE_VF_STATISTICS);
+
+	MATCH(PIPELINE_SELECT);
+
+	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
+	MATCH3D(3DSTATE_CLEAR_PARAMS);
+	MATCH3D(3DSTATE_DEPTH_BUFFER);
+	MATCH3D(3DSTATE_STENCIL_BUFFER);
+	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
+	MATCH3D(3DSTATE_VERTEX_BUFFERS);
+	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
+	MATCH3D(3DSTATE_INDEX_BUFFER);
+	MATCH3D(3DSTATE_VF);
+	MATCH3D(3DSTATE_MULTISAMPLE);
+	MATCH3D(3DSTATE_CC_STATE_POINTERS);
+	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
+	MATCH3D(3DSTATE_VS);
+	MATCH3D(3DSTATE_GS);
+	MATCH3D(3DSTATE_CLIP);
+	MATCH3D(3DSTATE_SF);
+	MATCH3D(3DSTATE_WM);
+	MATCH3D(3DSTATE_CONSTANT_VS);
+	MATCH3D(3DSTATE_CONSTANT_GS);
+	MATCH3D(3DSTATE_SAMPLE_MASK);
+	MATCH3D(3DSTATE_CONSTANT_HS);
+	MATCH3D(3DSTATE_CONSTANT_DS);
+	MATCH3D(3DSTATE_HS);
+	MATCH3D(3DSTATE_TE);
+	MATCH3D(3DSTATE_DS);
+	MATCH3D(3DSTATE_STREAMOUT);
+	MATCH3D(3DSTATE_SBE);
+	MATCH3D(3DSTATE_PS);
+	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
+	MATCH3D(3DSTATE_CPS_POINTERS);
+	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
+	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
+	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
+	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
+	MATCH3D(3DSTATE_VF_INSTANCING);
+	MATCH3D(3DSTATE_VF_SGVS);
+	MATCH3D(3DSTATE_VF_TOPOLOGY);
+	MATCH3D(3DSTATE_WM_CHROMAKEY);
+	MATCH3D(3DSTATE_PS_BLEND);
+	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
+	MATCH3D(3DSTATE_PS_EXTRA);
+	MATCH3D(3DSTATE_RASTER);
+	MATCH3D(3DSTATE_SBE_SWIZ);
+	MATCH3D(3DSTATE_WM_HZ_OP);
+	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
+	MATCH3D(3DSTATE_VF_SGVS_2);
+	MATCH3D(3DSTATE_VFG);
+	MATCH3D(3DSTATE_URB_ALLOC_VS);
+	MATCH3D(3DSTATE_URB_ALLOC_HS);
+	MATCH3D(3DSTATE_URB_ALLOC_DS);
+	MATCH3D(3DSTATE_URB_ALLOC_GS);
+	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
+	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
+	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
+	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
+	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
+	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
+	MATCH3D(3DSTATE_AMFS);
+	MATCH3D(3DSTATE_DEPTH_BOUNDS);
+	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
+	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
+	MATCH3D(3DSTATE_MESH_CONTROL);
+	MATCH3D(3DSTATE_MESH_DISTRIB);
+	MATCH3D(3DSTATE_TASK_REDISTRIB);
+	MATCH3D(3DSTATE_MESH_SHADER);
+	MATCH3D(3DSTATE_MESH_SHADER_DATA);
+	MATCH3D(3DSTATE_TASK_CONTROL);
+	MATCH3D(3DSTATE_TASK_SHADER);
+	MATCH3D(3DSTATE_TASK_SHADER_DATA);
+	MATCH3D(3DSTATE_URB_ALLOC_MESH);
+	MATCH3D(3DSTATE_URB_ALLOC_TASK);
+	MATCH3D(3DSTATE_CLIP_MESH);
+	MATCH3D(3DSTATE_SBE_MESH);
+	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
+
+	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
+	MATCH3D(3DSTATE_CHROMA_KEY);
+	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
+	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
+	MATCH3D(3DSTATE_LINE_STIPPLE);
+	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
+	MATCH3D(3DSTATE_MONOFILTER_SIZE);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
+	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
+	MATCH3D(3DSTATE_SO_DECL_LIST);
+	MATCH3D(3DSTATE_SO_BUFFER);
+	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
+	MATCH3D(3DSTATE_SAMPLE_PATTERN);
+	MATCH3D(3DSTATE_3D_MODE);
+	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
+	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
+	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
+
+	default:
+		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
+			   *dw, pipeline, opcode, subopcode, numdw);
+		return numdw;
+	}
+}
+
+void xe_lrc_dump_default(struct drm_printer *p,
+			 struct xe_gt *gt,
+			 enum xe_engine_class hwe_class)
+{
+	u32 *dw;
+	int remaining_dw, num_dw;
+
+	if (!gt->default_lrc[hwe_class]) {
+		drm_printf(p, "No default LRC for class %d\n", hwe_class);
+		return;
+	}
+
+	/*
+	 * Skip the beginning of the LRC since it contains the per-process
+	 * hardware status page.
+	 */
+	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
+	remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4;
+
+	while (remaining_dw > 0) {
+		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
+			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
+		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
+			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
+		} else {
+			num_dw = min(instr_dw(*dw), remaining_dw);
+			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
+				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
+				   num_dw);
+		}
+
+		dw += num_dw;
+		remaining_dw -= num_dw;
+	}
+}
+
+struct instr_state {
+	u32 instr;
+	u16 num_dw;
+};
+
+static const struct instr_state xe_hpg_svg_state[] = {
+	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
+	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
+	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
+	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
+	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
+	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
+	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
+	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
+	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
+	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
+	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
+	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
+	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
+	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
+	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
+	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
+	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
+	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
+	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
+};
+
+void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
+{
+	struct xe_gt *gt = q->hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	const struct instr_state *state_table = NULL;
+	int state_table_size = 0;
+
+	/*
+	 * At the moment we only need to emit non-register state for the RCS
+	 * engine.
+	 */
+	if (q->hwe->class != XE_ENGINE_CLASS_RENDER)
+		return;
+
+	switch (GRAPHICS_VERx100(xe)) {
+	case 1255:
+	case 1270 ... 2004:
+		state_table = xe_hpg_svg_state;
+		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
+		break;
+	default:
+		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
+			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
+		return;
+	}
+
+	for (int i = 0; i < state_table_size; i++) {
+		u32 instr = state_table[i].instr;
+		u16 num_dw = state_table[i].num_dw;
+		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
+
+		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
+		xe_gt_assert(gt, num_dw != 0);
+		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
+
+		/*
+		 * Xe2's SVG context is the same as the one on DG2 / MTL
+		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
+		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
+		 * Just make the replacement here rather than defining a
+		 * whole separate table for the single trivial change.
+		 */
+		if (GRAPHICS_VER(xe) >= 20 &&
+		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
+			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
+
+		bb->cs[bb->len] = instr;
+		if (!is_single_dw)
+			bb->cs[bb->len] |= (num_dw - 2);
+
+		bb->len += num_dw;
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
new file mode 100644
index 000000000000..28b1d3f404d4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+#ifndef _XE_LRC_H_
+#define _XE_LRC_H_
+
+#include "xe_lrc_types.h"
+
+struct drm_printer;
+struct xe_bb;
+struct xe_device;
+struct xe_exec_queue;
+enum xe_engine_class;
+struct xe_hw_engine;
+struct xe_vm;
+
+#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
+
+int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+		struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size);
+void xe_lrc_finish(struct xe_lrc *lrc);
+
+size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class);
+u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
+
+void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head);
+u32 xe_lrc_ring_head(struct xe_lrc *lrc);
+u32 xe_lrc_ring_space(struct xe_lrc *lrc);
+void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size);
+
+u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);
+u32 *xe_lrc_regs(struct xe_lrc *lrc);
+
+u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr);
+void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val);
+
+u64 xe_lrc_descriptor(struct xe_lrc *lrc);
+
+u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc);
+struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc);
+s32 xe_lrc_seqno(struct xe_lrc *lrc);
+
+u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc);
+s32 xe_lrc_start_seqno(struct xe_lrc *lrc);
+
+u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc);
+struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc);
+
+size_t xe_lrc_skip_size(struct xe_device *xe);
+
+void xe_lrc_dump_default(struct drm_printer *p,
+			 struct xe_gt *gt,
+			 enum xe_engine_class);
+
+void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
new file mode 100644
index 000000000000..78220336062c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_LRC_TYPES_H_
+#define _XE_LRC_TYPES_H_
+
+#include "xe_hw_fence_types.h"
+
+struct xe_bo;
+
+/**
+ * struct xe_lrc - Logical ring context (LRC) and submission ring object
+ */
+struct xe_lrc {
+	/**
+	 * @bo: buffer object (memory) for logical ring context, per process HW
+	 * status page, and submission ring.
+	 */
+	struct xe_bo *bo;
+
+	/** @tile: tile which this LRC belongs to */
+	struct xe_tile *tile;
+
+	/** @flags: LRC flags */
+	u32 flags;
+
+	/** @ring: submission ring state */
+	struct {
+		/** @size: size of submission ring */
+		u32 size;
+		/** @tail: tail of submission ring */
+		u32 tail;
+		/** @old_tail: shadow of tail */
+		u32 old_tail;
+	} ring;
+
+	/** @desc: LRC descriptor */
+	u64 desc;
+
+	/** @fence_ctx: context for hw fence */
+	struct xe_hw_fence_ctx fence_ctx;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h
new file mode 100644
index 000000000000..daf56c846d03
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_macros.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_MACROS_H_
+#define _XE_MACROS_H_
+
+#include <linux/bug.h>
+
+#define XE_WARN_ON WARN_ON
+
+#define XE_IOCTL_DBG(xe, cond) \
+	((cond) && (drm_dbg(&(xe)->drm, \
+			    "Ioctl argument check failed at %s:%d: %s", \
+			    __FILE__, __LINE__, #cond), 1))
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h
new file mode 100644
index 000000000000..f62e0c8b67ab
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_map.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_MAP_H_
+#define _XE_MAP_H_
+
+#include <linux/iosys-map.h>
+
+#include <xe_device.h>
+
+/**
+ * DOC: Map layer
+ *
+ * All access to any memory shared with a device (both sysmem and vram) in the
+ * XE driver should go through this layer (xe_map). This layer is built on top
+ * of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory`
+ * and with extra hooks into the XE driver that allows adding asserts to memory
+ * accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics).
+ */
+
+static inline void xe_map_memcpy_to(struct xe_device *xe, struct iosys_map *dst,
+				    size_t dst_offset, const void *src,
+				    size_t len)
+{
+	xe_device_assert_mem_access(xe);
+	iosys_map_memcpy_to(dst, dst_offset, src, len);
+}
+
+static inline void xe_map_memcpy_from(struct xe_device *xe, void *dst,
+				      const struct iosys_map *src,
+				      size_t src_offset, size_t len)
+{
+	xe_device_assert_mem_access(xe);
+	iosys_map_memcpy_from(dst, src, src_offset, len);
+}
+
+static inline void xe_map_memset(struct xe_device *xe,
+				 struct iosys_map *dst, size_t offset,
+				 int value, size_t len)
+{
+	xe_device_assert_mem_access(xe);
+	iosys_map_memset(dst, offset, value, len);
+}
+
+/* FIXME: We likely should kill these two functions sooner or later */
+static inline u32 xe_map_read32(struct xe_device *xe, struct iosys_map *map)
+{
+	xe_device_assert_mem_access(xe);
+
+	if (map->is_iomem)
+		return readl(map->vaddr_iomem);
+	else
+		return READ_ONCE(*(u32 *)map->vaddr);
+}
+
+static inline void xe_map_write32(struct xe_device *xe, struct iosys_map *map,
+				  u32 val)
+{
+	xe_device_assert_mem_access(xe);
+
+	if (map->is_iomem)
+		writel(val, map->vaddr_iomem);
+	else
+		*(u32 *)map->vaddr = val;
+}
+
+#define xe_map_rd(xe__, map__, offset__, type__) ({			\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_rd(map__, offset__, type__);				\
+})
+
+#define xe_map_wr(xe__, map__, offset__, type__, val__) ({		\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_wr(map__, offset__, type__, val__);			\
+})
+
+#define xe_map_rd_field(xe__, map__, struct_offset__, struct_type__, field__) ({	\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_rd_field(map__, struct_offset__, struct_type__, field__);		\
+})
+
+#define xe_map_wr_field(xe__, map__, struct_offset__, struct_type__, field__, val__) ({	\
+	struct xe_device *__xe = xe__;					\
+	xe_device_assert_mem_access(__xe);				\
+	iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__);	\
+})
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
new file mode 100644
index 000000000000..adf1dab5eba2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -0,0 +1,1410 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "xe_migrate.h"
+
+#include <linux/bitfield.h>
+#include <linux/sizes.h>
+
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+
+#include "generated/xe_wa_oob.h"
+#include "instructions/xe_mi_commands.h"
+#include "regs/xe_gpu_commands.h"
+#include "tests/xe_test.h"
+#include "xe_assert.h"
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_exec_queue.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_mocs.h"
+#include "xe_pt.h"
+#include "xe_res_cursor.h"
+#include "xe_sched_job.h"
+#include "xe_sync.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+#include "xe_wa.h"
+
+/**
+ * struct xe_migrate - migrate context.
+ */
+struct xe_migrate {
+	/** @q: Default exec queue used for migration */
+	struct xe_exec_queue *q;
+	/** @tile: Backpointer to the tile this struct xe_migrate belongs to. */
+	struct xe_tile *tile;
+	/** @job_mutex: Timeline mutex for @eng. */
+	struct mutex job_mutex;
+	/** @pt_bo: Page-table buffer object. */
+	struct xe_bo *pt_bo;
+	/** @batch_base_ofs: VM offset of the migration batch buffer */
+	u64 batch_base_ofs;
+	/** @usm_batch_base_ofs: VM offset of the usm batch buffer */
+	u64 usm_batch_base_ofs;
+	/** @cleared_mem_ofs: VM offset of @cleared_bo. */
+	u64 cleared_mem_ofs;
+	/**
+	 * @fence: dma-fence representing the last migration job batch.
+	 * Protected by @job_mutex.
+	 */
+	struct dma_fence *fence;
+	/**
+	 * @vm_update_sa: For integrated, used to suballocate page-tables
+	 * out of the pt_bo.
+	 */
+	struct drm_suballoc_manager vm_update_sa;
+};
+
+#define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */
+#define MAX_CCS_LIMITED_TRANSFER SZ_4M /* XE_PAGE_SIZE * (FIELD_MAX(XE2_CCS_SIZE_MASK) + 1) */
+#define NUM_KERNEL_PDE 17
+#define NUM_PT_SLOTS 32
+#define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M
+
+/**
+ * xe_tile_migrate_engine() - Get this tile's migrate engine.
+ * @tile: The tile.
+ *
+ * Returns the default migrate engine of this tile.
+ * TODO: Perhaps this function is slightly misplaced, and even unneeded?
+ *
+ * Return: The default migrate engine
+ */
+struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile)
+{
+	return tile->migrate->q;
+}
+
+static void xe_migrate_fini(struct drm_device *dev, void *arg)
+{
+	struct xe_migrate *m = arg;
+
+	xe_vm_lock(m->q->vm, false);
+	xe_bo_unpin(m->pt_bo);
+	xe_vm_unlock(m->q->vm);
+
+	dma_fence_put(m->fence);
+	xe_bo_put(m->pt_bo);
+	drm_suballoc_manager_fini(&m->vm_update_sa);
+	mutex_destroy(&m->job_mutex);
+	xe_vm_close_and_put(m->q->vm);
+	xe_exec_queue_put(m->q);
+}
+
+static u64 xe_migrate_vm_addr(u64 slot, u32 level)
+{
+	XE_WARN_ON(slot >= NUM_PT_SLOTS);
+
+	/* First slot is reserved for mapping of PT bo and bb, start from 1 */
+	return (slot + 1ULL) << xe_pt_shift(level + 1);
+}
+
+static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr)
+{
+	/*
+	 * Remove the DPA to get a correct offset into identity table for the
+	 * migrate offset
+	 */
+	addr -= xe->mem.vram.dpa_base;
+	return addr + (256ULL << xe_pt_shift(2));
+}
+
+static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
+				 struct xe_vm *vm)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
+	u8 id = tile->id;
+	u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
+	u32 map_ofs, level, i;
+	struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo;
+	u64 entry;
+
+	/* Can't bump NUM_PT_SLOTS too high */
+	BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE);
+	/* Must be a multiple of 64K to support all platforms */
+	BUILD_BUG_ON(NUM_PT_SLOTS * XE_PAGE_SIZE % SZ_64K);
+	/* And one slot reserved for the 4KiB page table updates */
+	BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1));
+
+	/* Need to be sure everything fits in the first PT, or create more */
+	xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M);
+
+	bo = xe_bo_create_pin_map(vm->xe, tile, vm,
+				  num_entries * XE_PAGE_SIZE,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, pat_index);
+	xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
+
+	map_ofs = (num_entries - num_level) * XE_PAGE_SIZE;
+
+	/* Map the entire BO in our level 0 pt */
+	for (i = 0, level = 0; i < num_entries; level++) {
+		entry = vm->pt_ops->pte_encode_bo(bo, i * XE_PAGE_SIZE,
+						  pat_index, 0);
+
+		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
+
+		if (vm->flags & XE_VM_FLAG_64K)
+			i += 16;
+		else
+			i += 1;
+	}
+
+	if (!IS_DGFX(xe)) {
+		/* Write out batch too */
+		m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE;
+		if (xe->info.has_usm) {
+			batch = tile->primary_gt->usm.bb_pool->bo;
+			m->usm_batch_base_ofs = m->batch_base_ofs;
+		}
+
+		for (i = 0; i < batch->size;
+		     i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
+		     XE_PAGE_SIZE) {
+			entry = vm->pt_ops->pte_encode_bo(batch, i,
+							  pat_index, 0);
+
+			xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
+				  entry);
+			level++;
+		}
+	} else {
+		u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
+
+		m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr);
+
+		if (xe->info.has_usm) {
+			batch = tile->primary_gt->usm.bb_pool->bo;
+			batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
+			m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr);
+		}
+	}
+
+	for (level = 1; level < num_level; level++) {
+		u32 flags = 0;
+
+		if (vm->flags & XE_VM_FLAG_64K && level == 1)
+			flags = XE_PDE_64K;
+
+		entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) *
+						  XE_PAGE_SIZE, pat_index);
+		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
+			  entry | flags);
+	}
+
+	/* Write PDE's that point to our BO. */
+	for (i = 0; i < num_entries - num_level; i++) {
+		entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE,
+						  pat_index);
+
+		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
+			  (i + 1) * 8, u64, entry);
+	}
+
+	/* Set up a 1GiB NULL mapping at 255GiB offset. */
+	level = 2;
+	xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64,
+		  vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0)
+		  | XE_PTE_NULL);
+	m->cleared_mem_ofs = (255ULL << xe_pt_shift(level));
+
+	/* Identity map the entire vram at 256GiB offset */
+	if (IS_DGFX(xe)) {
+		u64 pos, ofs, flags;
+
+		level = 2;
+		ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8;
+		flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level,
+						    true, 0);
+
+		/*
+		 * Use 1GB pages, it shouldn't matter the physical amount of
+		 * vram is less, when we don't access it.
+		 */
+		for (pos = xe->mem.vram.dpa_base;
+		     pos < xe->mem.vram.actual_physical_size + xe->mem.vram.dpa_base;
+		     pos += SZ_1G, ofs += 8)
+			xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+	}
+
+	/*
+	 * Example layout created above, with root level = 3:
+	 * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's
+	 * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's
+	 * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's
+	 * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2]
+	 *
+	 * This makes the lowest part of the VM point to the pagetables.
+	 * Hence the lowest 2M in the vm should point to itself, with a few writes
+	 * and flushes, other parts of the VM can be used either for copying and
+	 * clearing.
+	 *
+	 * For performance, the kernel reserves PDE's, so about 20 are left
+	 * for async VM updates.
+	 *
+	 * To make it easier to work, each scratch PT is put in slot (1 + PT #)
+	 * everywhere, this allows lockless updates to scratch pages by using
+	 * the different addresses in VM.
+	 */
+#define NUM_VMUSA_UNIT_PER_PAGE	32
+#define VM_SA_UPDATE_UNIT_SIZE		(XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE)
+#define NUM_VMUSA_WRITES_PER_UNIT	(VM_SA_UPDATE_UNIT_SIZE / sizeof(u64))
+	drm_suballoc_manager_init(&m->vm_update_sa,
+				  (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
+				  NUM_VMUSA_UNIT_PER_PAGE, 0);
+
+	m->pt_bo = bo;
+	return 0;
+}
+
+/*
+ * Due to workaround 16017236439, odd instance hardware copy engines are
+ * faster than even instance ones.
+ * This function returns the mask involving all fast copy engines and the
+ * reserved copy engine to be used as logical mask for migrate engine.
+ * Including the reserved copy engine is required to avoid deadlocks due to
+ * migrate jobs servicing the faults gets stuck behind the job that faulted.
+ */
+static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
+{
+	u32 logical_mask = 0;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id) {
+		if (hwe->class != XE_ENGINE_CLASS_COPY)
+			continue;
+
+		if (!XE_WA(gt, 16017236439) ||
+		    xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1)
+			logical_mask |= BIT(hwe->logical_instance);
+	}
+
+	return logical_mask;
+}
+
+/**
+ * xe_migrate_init() - Initialize a migrate context
+ * @tile: Back-pointer to the tile we're initializing for.
+ *
+ * Return: Pointer to a migrate context on success. Error pointer on error.
+ */
+struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_gt *primary_gt = tile->primary_gt;
+	struct xe_migrate *m;
+	struct xe_vm *vm;
+	int err;
+
+	m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL);
+	if (!m)
+		return ERR_PTR(-ENOMEM);
+
+	m->tile = tile;
+
+	/* Special layout, prepared below.. */
+	vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION |
+			  XE_VM_FLAG_SET_TILE_ID(tile));
+	if (IS_ERR(vm))
+		return ERR_CAST(vm);
+
+	xe_vm_lock(vm, false);
+	err = xe_migrate_prepare_vm(tile, m, vm);
+	xe_vm_unlock(vm);
+	if (err) {
+		xe_vm_close_and_put(vm);
+		return ERR_PTR(err);
+	}
+
+	if (xe->info.has_usm) {
+		struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
+							   XE_ENGINE_CLASS_COPY,
+							   primary_gt->usm.reserved_bcs_instance,
+							   false);
+		u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt);
+
+		if (!hwe || !logical_mask)
+			return ERR_PTR(-EINVAL);
+
+		m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
+					    EXEC_QUEUE_FLAG_KERNEL |
+					    EXEC_QUEUE_FLAG_PERMANENT);
+	} else {
+		m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
+						  XE_ENGINE_CLASS_COPY,
+						  EXEC_QUEUE_FLAG_KERNEL |
+						  EXEC_QUEUE_FLAG_PERMANENT);
+	}
+	if (IS_ERR(m->q)) {
+		xe_vm_close_and_put(vm);
+		return ERR_CAST(m->q);
+	}
+	if (xe->info.has_usm)
+		m->q->priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
+
+	mutex_init(&m->job_mutex);
+
+	err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m);
+	if (err)
+		return ERR_PTR(err);
+
+	return m;
+}
+
+static u64 max_mem_transfer_per_pass(struct xe_device *xe)
+{
+	if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe))
+		return MAX_CCS_LIMITED_TRANSFER;
+
+	return MAX_PREEMPTDISABLE_TRANSFER;
+}
+
+static u64 xe_migrate_res_sizes(struct xe_device *xe, struct xe_res_cursor *cur)
+{
+	/*
+	 * For VRAM we use identity mapped pages so we are limited to current
+	 * cursor size. For system we program the pages ourselves so we have no
+	 * such limitation.
+	 */
+	return min_t(u64, max_mem_transfer_per_pass(xe),
+		     mem_type_is_vram(cur->mem_type) ? cur->size :
+		     cur->remaining);
+}
+
+static u32 pte_update_size(struct xe_migrate *m,
+			   bool is_vram,
+			   struct ttm_resource *res,
+			   struct xe_res_cursor *cur,
+			   u64 *L0, u64 *L0_ofs, u32 *L0_pt,
+			   u32 cmd_size, u32 pt_ofs, u32 avail_pts)
+{
+	u32 cmds = 0;
+
+	*L0_pt = pt_ofs;
+	if (!is_vram) {
+		/* Clip L0 to available size */
+		u64 size = min(*L0, (u64)avail_pts * SZ_2M);
+		u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+
+		*L0 = size;
+		*L0_ofs = xe_migrate_vm_addr(pt_ofs, 0);
+
+		/* MI_STORE_DATA_IMM */
+		cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff);
+
+		/* PDE qwords */
+		cmds += num_4k_pages * 2;
+
+		/* Each chunk has a single blit command */
+		cmds += cmd_size;
+	} else {
+		/* Offset into identity map. */
+		*L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile),
+					      cur->start + vram_region_gpu_offset(res));
+		cmds += cmd_size;
+	}
+
+	return cmds;
+}
+
+static void emit_pte(struct xe_migrate *m,
+		     struct xe_bb *bb, u32 at_pt,
+		     bool is_vram, bool is_comp_pte,
+		     struct xe_res_cursor *cur,
+		     u32 size, struct xe_bo *bo)
+{
+	struct xe_device *xe = tile_to_xe(m->tile);
+
+	u16 pat_index;
+	u32 ptes;
+	u64 ofs = at_pt * XE_PAGE_SIZE;
+	u64 cur_ofs;
+
+	/* Indirect access needs compression enabled uncached PAT index */
+	if (GRAPHICS_VERx100(xe) >= 2000)
+		pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] :
+					  xe->pat.idx[XE_CACHE_NONE];
+	else
+		pat_index = xe->pat.idx[XE_CACHE_WB];
+
+	/*
+	 * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently
+	 * we're only emitting VRAM PTEs during sanity tests, so when
+	 * that's moved to a Kunit test, we should condition VRAM PTEs
+	 * on running tests.
+	 */
+
+	ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+
+	while (ptes) {
+		u32 chunk = min(0x1ffU, ptes);
+
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
+		bb->cs[bb->len++] = ofs;
+		bb->cs[bb->len++] = 0;
+
+		cur_ofs = ofs;
+		ofs += chunk * 8;
+		ptes -= chunk;
+
+		while (chunk--) {
+			u64 addr, flags = 0;
+			bool devmem = false;
+
+			addr = xe_res_dma(cur) & PAGE_MASK;
+			if (is_vram) {
+				/* Is this a 64K PTE entry? */
+				if ((m->q->vm->flags & XE_VM_FLAG_64K) &&
+				    !(cur_ofs & (16 * 8 - 1))) {
+					xe_tile_assert(m->tile, IS_ALIGNED(addr, SZ_64K));
+					flags |= XE_PTE_PS64;
+				}
+
+				addr += vram_region_gpu_offset(bo->ttm.resource);
+				devmem = true;
+			}
+
+			addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
+								 addr, pat_index,
+								 0, devmem, flags);
+			bb->cs[bb->len++] = lower_32_bits(addr);
+			bb->cs[bb->len++] = upper_32_bits(addr);
+
+			xe_res_next(cur, min_t(u32, size, PAGE_SIZE));
+			cur_ofs += 8;
+		}
+	}
+}
+
+#define EMIT_COPY_CCS_DW 5
+static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
+			  u64 dst_ofs, bool dst_is_indirect,
+			  u64 src_ofs, bool src_is_indirect,
+			  u32 size)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 *cs = bb->cs + bb->len;
+	u32 num_ccs_blks;
+	u32 num_pages;
+	u32 ccs_copy_size;
+	u32 mocs;
+
+	if (GRAPHICS_VERx100(xe) >= 2000) {
+		num_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+		xe_gt_assert(gt, FIELD_FIT(XE2_CCS_SIZE_MASK, num_pages - 1));
+
+		ccs_copy_size = REG_FIELD_PREP(XE2_CCS_SIZE_MASK, num_pages - 1);
+		mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index);
+
+	} else {
+		num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
+					    NUM_CCS_BYTES_PER_BLOCK);
+		xe_gt_assert(gt, FIELD_FIT(CCS_SIZE_MASK, num_ccs_blks - 1));
+
+		ccs_copy_size = REG_FIELD_PREP(CCS_SIZE_MASK, num_ccs_blks - 1);
+		mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index);
+	}
+
+	*cs++ = XY_CTRL_SURF_COPY_BLT |
+		(src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
+		(dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
+		ccs_copy_size;
+	*cs++ = lower_32_bits(src_ofs);
+	*cs++ = upper_32_bits(src_ofs) | mocs;
+	*cs++ = lower_32_bits(dst_ofs);
+	*cs++ = upper_32_bits(dst_ofs) | mocs;
+
+	bb->len = cs - bb->cs;
+}
+
+#define EMIT_COPY_DW 10
+static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
+		      u64 src_ofs, u64 dst_ofs, unsigned int size,
+		      unsigned int pitch)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 mocs = 0;
+	u32 tile_y = 0;
+
+	xe_gt_assert(gt, size / pitch <= S16_MAX);
+	xe_gt_assert(gt, pitch / 4 <= S16_MAX);
+	xe_gt_assert(gt, pitch <= U16_MAX);
+
+	if (GRAPHICS_VER(xe) >= 20)
+		mocs = FIELD_PREP(XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index);
+
+	if (GRAPHICS_VERx100(xe) >= 1250)
+		tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4;
+
+	bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
+	bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs;
+	bb->cs[bb->len++] = 0;
+	bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
+	bb->cs[bb->len++] = lower_32_bits(dst_ofs);
+	bb->cs[bb->len++] = upper_32_bits(dst_ofs);
+	bb->cs[bb->len++] = 0;
+	bb->cs[bb->len++] = pitch | mocs;
+	bb->cs[bb->len++] = lower_32_bits(src_ofs);
+	bb->cs[bb->len++] = upper_32_bits(src_ofs);
+}
+
+static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv,
+			enum dma_resv_usage usage)
+{
+	return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage);
+}
+
+static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
+{
+	return usm ? m->usm_batch_base_ofs : m->batch_base_ofs;
+}
+
+static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
+			       struct xe_bb *bb,
+			       u64 src_ofs, bool src_is_indirect,
+			       u64 dst_ofs, bool dst_is_indirect, u32 dst_size,
+			       u64 ccs_ofs, bool copy_ccs)
+{
+	struct xe_gt *gt = m->tile->primary_gt;
+	u32 flush_flags = 0;
+
+	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
+		/*
+		 * If the src is already in vram, then it should already
+		 * have been cleared by us, or has been populated by the
+		 * user. Make sure we copy the CCS aux state as-is.
+		 *
+		 * Otherwise if the bo doesn't have any CCS metadata attached,
+		 * we still need to clear it for security reasons.
+		 */
+		u64 ccs_src_ofs =  src_is_indirect ? src_ofs : m->cleared_mem_ofs;
+
+		emit_copy_ccs(gt, bb,
+			      dst_ofs, true,
+			      ccs_src_ofs, src_is_indirect, dst_size);
+
+		flush_flags = MI_FLUSH_DW_CCS;
+	} else if (copy_ccs) {
+		if (!src_is_indirect)
+			src_ofs = ccs_ofs;
+		else if (!dst_is_indirect)
+			dst_ofs = ccs_ofs;
+
+		xe_gt_assert(gt, src_is_indirect || dst_is_indirect);
+
+		emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect, src_ofs,
+			      src_is_indirect, dst_size);
+		if (dst_is_indirect)
+			flush_flags = MI_FLUSH_DW_CCS;
+	}
+
+	return flush_flags;
+}
+
+/**
+ * xe_migrate_copy() - Copy content of TTM resources.
+ * @m: The migration context.
+ * @src_bo: The buffer object @src is currently bound to.
+ * @dst_bo: If copying between resources created for the same bo, set this to
+ * the same value as @src_bo. If copying between buffer objects, set it to
+ * the buffer object @dst is currently bound to.
+ * @src: The source TTM resource.
+ * @dst: The dst TTM resource.
+ * @copy_only_ccs: If true copy only CCS metadata
+ *
+ * Copies the contents of @src to @dst: On flat CCS devices,
+ * the CCS metadata is copied as well if needed, or if not present,
+ * the CCS metadata of @dst is cleared for security reasons.
+ *
+ * Return: Pointer to a dma_fence representing the last copy batch, or
+ * an error pointer on failure. If there is a failure, any copy operation
+ * started by the function call has been synced.
+ */
+struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
+				  struct xe_bo *src_bo,
+				  struct xe_bo *dst_bo,
+				  struct ttm_resource *src,
+				  struct ttm_resource *dst,
+				  bool copy_only_ccs)
+{
+	struct xe_gt *gt = m->tile->primary_gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct dma_fence *fence = NULL;
+	u64 size = src_bo->size;
+	struct xe_res_cursor src_it, dst_it, ccs_it;
+	u64 src_L0_ofs, dst_L0_ofs;
+	u32 src_L0_pt, dst_L0_pt;
+	u64 src_L0, dst_L0;
+	int pass = 0;
+	int err;
+	bool src_is_pltt = src->mem_type == XE_PL_TT;
+	bool dst_is_pltt = dst->mem_type == XE_PL_TT;
+	bool src_is_vram = mem_type_is_vram(src->mem_type);
+	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
+	bool copy_ccs = xe_device_has_flat_ccs(xe) &&
+		xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo);
+	bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
+
+	/* Copying CCS between two different BOs is not supported yet. */
+	if (XE_WARN_ON(copy_ccs && src_bo != dst_bo))
+		return ERR_PTR(-EINVAL);
+
+	if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size))
+		return ERR_PTR(-EINVAL);
+
+	if (!src_is_vram)
+		xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
+	else
+		xe_res_first(src, 0, size, &src_it);
+	if (!dst_is_vram)
+		xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it);
+	else
+		xe_res_first(dst, 0, size, &dst_it);
+
+	if (copy_system_ccs)
+		xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo),
+				PAGE_ALIGN(xe_device_ccs_bytes(xe, size)),
+				&ccs_it);
+
+	while (size) {
+		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 flush_flags;
+		u32 update_idx;
+		u64 ccs_ofs, ccs_size;
+		u32 ccs_pt;
+
+		bool usm = xe->info.has_usm;
+		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+		src_L0 = xe_migrate_res_sizes(xe, &src_it);
+		dst_L0 = xe_migrate_res_sizes(xe, &dst_it);
+
+		drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n",
+			pass++, src_L0, dst_L0);
+
+		src_L0 = min(src_L0, dst_L0);
+
+		batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0,
+					      &src_L0_ofs, &src_L0_pt, 0, 0,
+					      avail_pts);
+
+		batch_size += pte_update_size(m, dst_is_vram, dst, &dst_it, &src_L0,
+					      &dst_L0_ofs, &dst_L0_pt, 0,
+					      avail_pts, avail_pts);
+
+		if (copy_system_ccs) {
+			ccs_size = xe_device_ccs_bytes(xe, src_L0);
+			batch_size += pte_update_size(m, false, NULL, &ccs_it, &ccs_size,
+						      &ccs_ofs, &ccs_pt, 0,
+						      2 * avail_pts,
+						      avail_pts);
+		}
+
+		/* Add copy commands size here */
+		batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
+			((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0));
+
+		bb = xe_bb_new(gt, batch_size, usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		if (!src_is_vram)
+			emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0,
+				 src_bo);
+		else
+			xe_res_next(&src_it, src_L0);
+
+		if (!dst_is_vram)
+			emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0,
+				 dst_bo);
+		else
+			xe_res_next(&dst_it, src_L0);
+
+		if (copy_system_ccs)
+			emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src_bo);
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		if (!copy_only_ccs)
+			emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
+
+		flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
+						  IS_DGFX(xe) ? src_is_vram : src_is_pltt,
+						  dst_L0_ofs,
+						  IS_DGFX(xe) ? dst_is_vram : dst_is_pltt,
+						  src_L0, ccs_ofs, copy_ccs);
+
+		mutex_lock(&m->job_mutex);
+		job = xe_bb_create_migration_job(m->q, bb,
+						 xe_migrate_batch_base(m, usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+		if (!fence) {
+			err = job_add_deps(job, src_bo->ttm.base.resv,
+					   DMA_RESV_USAGE_BOOKKEEP);
+			if (!err && src_bo != dst_bo)
+				err = job_add_deps(job, dst_bo->ttm.base.resv,
+						   DMA_RESV_USAGE_BOOKKEEP);
+			if (err)
+				goto err_job;
+		}
+
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		size -= src_L0;
+		continue;
+
+err_job:
+		xe_sched_job_put(job);
+err:
+		mutex_unlock(&m->job_mutex);
+		xe_bb_free(bb, NULL);
+
+err_sync:
+		/* Sync partial copy if any. FIXME: under job_mutex? */
+		if (fence) {
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+
+		return ERR_PTR(err);
+	}
+
+	return fence;
+}
+
+static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+				 u32 size, u32 pitch)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 *cs = bb->cs + bb->len;
+	u32 len = PVC_MEM_SET_CMD_LEN_DW;
+
+	*cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2);
+	*cs++ = pitch - 1;
+	*cs++ = (size / pitch) - 1;
+	*cs++ = pitch - 1;
+	*cs++ = lower_32_bits(src_ofs);
+	*cs++ = upper_32_bits(src_ofs);
+	if (GRAPHICS_VERx100(xe) >= 2000)
+		*cs++ = FIELD_PREP(XE2_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
+	else
+		*cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
+
+	xe_gt_assert(gt, cs - bb->cs == len + bb->len);
+
+	bb->len += len;
+}
+
+static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
+				 u64 src_ofs, u32 size, u32 pitch, bool is_vram)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 *cs = bb->cs + bb->len;
+	u32 len = XY_FAST_COLOR_BLT_DW;
+
+	if (GRAPHICS_VERx100(xe) < 1250)
+		len = 11;
+
+	*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
+		(len - 2);
+	if (GRAPHICS_VERx100(xe) >= 2000)
+		*cs++ = FIELD_PREP(XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index) |
+			(pitch - 1);
+	else
+		*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, gt->mocs.uc_index) |
+			(pitch - 1);
+	*cs++ = 0;
+	*cs++ = (size / pitch) << 16 | pitch / 4;
+	*cs++ = lower_32_bits(src_ofs);
+	*cs++ = upper_32_bits(src_ofs);
+	*cs++ = (is_vram ? 0x0 : 0x1) <<  XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	if (len > 11) {
+		*cs++ = 0;
+		*cs++ = 0;
+		*cs++ = 0;
+		*cs++ = 0;
+		*cs++ = 0;
+	}
+
+	xe_gt_assert(gt, cs - bb->cs == len + bb->len);
+
+	bb->len += len;
+}
+
+static bool has_service_copy_support(struct xe_gt *gt)
+{
+	/*
+	 * What we care about is whether the architecture was designed with
+	 * service copy functionality (specifically the new MEM_SET / MEM_COPY
+	 * instructions) so check the architectural engine list rather than the
+	 * actual list since these instructions are usable on BCS0 even if
+	 * all of the actual service copy engines (BCS1-BCS8) have been fused
+	 * off.
+	 */
+	return gt->info.__engine_mask & GENMASK(XE_HW_ENGINE_BCS8,
+						XE_HW_ENGINE_BCS1);
+}
+
+static u32 emit_clear_cmd_len(struct xe_gt *gt)
+{
+	if (has_service_copy_support(gt))
+		return PVC_MEM_SET_CMD_LEN_DW;
+	else
+		return XY_FAST_COLOR_BLT_DW;
+}
+
+static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+		       u32 size, u32 pitch, bool is_vram)
+{
+	if (has_service_copy_support(gt))
+		emit_clear_link_copy(gt, bb, src_ofs, size, pitch);
+	else
+		emit_clear_main_copy(gt, bb, src_ofs, size, pitch,
+				     is_vram);
+}
+
+/**
+ * xe_migrate_clear() - Copy content of TTM resources.
+ * @m: The migration context.
+ * @bo: The buffer object @dst is currently bound to.
+ * @dst: The dst TTM resource to be cleared.
+ *
+ * Clear the contents of @dst to zero. On flat CCS devices,
+ * the CCS metadata is cleared to zero as well on VRAM destinations.
+ * TODO: Eliminate the @bo argument.
+ *
+ * Return: Pointer to a dma_fence representing the last clear batch, or
+ * an error pointer on failure. If there is a failure, any clear operation
+ * started by the function call has been synced.
+ */
+struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
+				   struct xe_bo *bo,
+				   struct ttm_resource *dst)
+{
+	bool clear_vram = mem_type_is_vram(dst->mem_type);
+	struct xe_gt *gt = m->tile->primary_gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false;
+	struct dma_fence *fence = NULL;
+	u64 size = bo->size;
+	struct xe_res_cursor src_it;
+	struct ttm_resource *src = dst;
+	int err;
+	int pass = 0;
+
+	if (!clear_vram)
+		xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it);
+	else
+		xe_res_first(src, 0, bo->size, &src_it);
+
+	while (size) {
+		u64 clear_L0_ofs;
+		u32 clear_L0_pt;
+		u32 flush_flags = 0;
+		u64 clear_L0;
+		struct xe_sched_job *job;
+		struct xe_bb *bb;
+		u32 batch_size, update_idx;
+
+		bool usm = xe->info.has_usm;
+		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+		clear_L0 = xe_migrate_res_sizes(xe, &src_it);
+
+		drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0);
+
+		/* Calculate final sizes and batch size.. */
+		batch_size = 2 +
+			pte_update_size(m, clear_vram, src, &src_it,
+					&clear_L0, &clear_L0_ofs, &clear_L0_pt,
+					clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0,
+					avail_pts);
+
+		if (xe_device_has_flat_ccs(xe))
+			batch_size += EMIT_COPY_CCS_DW;
+
+		/* Clear commands */
+
+		if (WARN_ON_ONCE(!clear_L0))
+			break;
+
+		bb = xe_bb_new(gt, batch_size, usm);
+		if (IS_ERR(bb)) {
+			err = PTR_ERR(bb);
+			goto err_sync;
+		}
+
+		size -= clear_L0;
+		/* Preemption is enabled again by the ring ops. */
+		if (!clear_vram) {
+			emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0,
+				 bo);
+		} else {
+			xe_res_next(&src_it, clear_L0);
+		}
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		if (!clear_system_ccs)
+			emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram);
+
+		if (xe_device_has_flat_ccs(xe)) {
+			emit_copy_ccs(gt, bb, clear_L0_ofs, true,
+				      m->cleared_mem_ofs, false, clear_L0);
+			flush_flags = MI_FLUSH_DW_CCS;
+		}
+
+		mutex_lock(&m->job_mutex);
+		job = xe_bb_create_migration_job(m->q, bb,
+						 xe_migrate_batch_base(m, usm),
+						 update_idx);
+		if (IS_ERR(job)) {
+			err = PTR_ERR(job);
+			goto err;
+		}
+
+		xe_sched_job_add_migrate_flush(job, flush_flags);
+		if (!fence) {
+			/*
+			 * There can't be anything userspace related at this
+			 * point, so we just need to respect any potential move
+			 * fences, which are always tracked as
+			 * DMA_RESV_USAGE_KERNEL.
+			 */
+			err = job_add_deps(job, bo->ttm.base.resv,
+					   DMA_RESV_USAGE_KERNEL);
+			if (err)
+				goto err_job;
+		}
+
+		xe_sched_job_arm(job);
+		dma_fence_put(fence);
+		fence = dma_fence_get(&job->drm.s_fence->finished);
+		xe_sched_job_push(job);
+
+		dma_fence_put(m->fence);
+		m->fence = dma_fence_get(fence);
+
+		mutex_unlock(&m->job_mutex);
+
+		xe_bb_free(bb, fence);
+		continue;
+
+err_job:
+		xe_sched_job_put(job);
+err:
+		mutex_unlock(&m->job_mutex);
+		xe_bb_free(bb, NULL);
+err_sync:
+		/* Sync partial copies if any. FIXME: job_mutex? */
+		if (fence) {
+			dma_fence_wait(m->fence, false);
+			dma_fence_put(fence);
+		}
+
+		return ERR_PTR(err);
+	}
+
+	if (clear_system_ccs)
+		bo->ccs_cleared = true;
+
+	return fence;
+}
+
+static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
+			  const struct xe_vm_pgtable_update *update,
+			  struct xe_migrate_pt_update *pt_update)
+{
+	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+	u32 chunk;
+	u32 ofs = update->ofs, size = update->qwords;
+
+	/*
+	 * If we have 512 entries (max), we would populate it ourselves,
+	 * and update the PDE above it to the new pointer.
+	 * The only time this can only happen if we have to update the top
+	 * PDE. This requires a BO that is almost vm->size big.
+	 *
+	 * This shouldn't be possible in practice.. might change when 16K
+	 * pages are used. Hence the assert.
+	 */
+	xe_tile_assert(tile, update->qwords <= 0x1ff);
+	if (!ppgtt_ofs)
+		ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile),
+						xe_bo_addr(update->pt_bo, 0,
+							   XE_PAGE_SIZE));
+
+	do {
+		u64 addr = ppgtt_ofs + ofs * 8;
+
+		chunk = min(update->qwords, 0x1ffU);
+
+		/* Ensure populatefn can do memset64 by aligning bb->cs */
+		if (!(bb->len & 1))
+			bb->cs[bb->len++] = MI_NOOP;
+
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
+		bb->cs[bb->len++] = lower_32_bits(addr);
+		bb->cs[bb->len++] = upper_32_bits(addr);
+		ops->populate(pt_update, tile, NULL, bb->cs + bb->len, ofs, chunk,
+			      update);
+
+		bb->len += chunk * 2;
+		ofs += chunk;
+		size -= chunk;
+	} while (size);
+}
+
+struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m)
+{
+	return xe_vm_get(m->q->vm);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+struct migrate_test_params {
+	struct xe_test_priv base;
+	bool force_gpu;
+};
+
+#define to_migrate_test_params(_priv) \
+	container_of(_priv, struct migrate_test_params, base)
+#endif
+
+static struct dma_fence *
+xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
+			       struct xe_vm *vm, struct xe_bo *bo,
+			       const struct  xe_vm_pgtable_update *updates,
+			       u32 num_updates, bool wait_vm,
+			       struct xe_migrate_pt_update *pt_update)
+{
+	XE_TEST_DECLARE(struct migrate_test_params *test =
+			to_migrate_test_params
+			(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));)
+	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+	struct dma_fence *fence;
+	int err;
+	u32 i;
+
+	if (XE_TEST_ONLY(test && test->force_gpu))
+		return ERR_PTR(-ETIME);
+
+	if (bo && !dma_resv_test_signaled(bo->ttm.base.resv,
+					  DMA_RESV_USAGE_KERNEL))
+		return ERR_PTR(-ETIME);
+
+	if (wait_vm && !dma_resv_test_signaled(xe_vm_resv(vm),
+					       DMA_RESV_USAGE_BOOKKEEP))
+		return ERR_PTR(-ETIME);
+
+	if (ops->pre_commit) {
+		pt_update->job = NULL;
+		err = ops->pre_commit(pt_update);
+		if (err)
+			return ERR_PTR(err);
+	}
+	for (i = 0; i < num_updates; i++) {
+		const struct xe_vm_pgtable_update *update = &updates[i];
+
+		ops->populate(pt_update, m->tile, &update->pt_bo->vmap, NULL,
+			      update->ofs, update->qwords, update);
+	}
+
+	if (vm) {
+		trace_xe_vm_cpu_bind(vm);
+		xe_device_wmb(vm->xe);
+	}
+
+	fence = dma_fence_get_stub();
+
+	return fence;
+}
+
+static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q,
+			struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct dma_fence *fence;
+	int i;
+
+	for (i = 0; i < num_syncs; i++) {
+		fence = syncs[i].fence;
+
+		if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				       &fence->flags))
+			return false;
+	}
+	if (q) {
+		fence = xe_exec_queue_last_fence_get(q, vm);
+		if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * xe_migrate_update_pgtables() - Pipelined page-table update
+ * @m: The migrate context.
+ * @vm: The vm we'll be updating.
+ * @bo: The bo whose dma-resv we will await before updating, or NULL if userptr.
+ * @q: The exec queue to be used for the update or NULL if the default
+ * migration engine is to be used.
+ * @updates: An array of update descriptors.
+ * @num_updates: Number of descriptors in @updates.
+ * @syncs: Array of xe_sync_entry to await before updating. Note that waits
+ * will block the engine timeline.
+ * @num_syncs: Number of entries in @syncs.
+ * @pt_update: Pointer to a struct xe_migrate_pt_update, which contains
+ * pointers to callback functions and, if subclassed, private arguments to
+ * those.
+ *
+ * Perform a pipelined page-table update. The update descriptors are typically
+ * built under the same lock critical section as a call to this function. If
+ * using the default engine for the updates, they will be performed in the
+ * order they grab the job_mutex. If different engines are used, external
+ * synchronization is needed for overlapping updates to maintain page-table
+ * consistency. Note that the meaing of "overlapping" is that the updates
+ * touch the same page-table, which might be a higher-level page-directory.
+ * If no pipelining is needed, then updates may be performed by the cpu.
+ *
+ * Return: A dma_fence that, when signaled, indicates the update completion.
+ */
+struct dma_fence *
+xe_migrate_update_pgtables(struct xe_migrate *m,
+			   struct xe_vm *vm,
+			   struct xe_bo *bo,
+			   struct xe_exec_queue *q,
+			   const struct xe_vm_pgtable_update *updates,
+			   u32 num_updates,
+			   struct xe_sync_entry *syncs, u32 num_syncs,
+			   struct xe_migrate_pt_update *pt_update)
+{
+	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+	struct xe_tile *tile = m->tile;
+	struct xe_gt *gt = tile->primary_gt;
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_sched_job *job;
+	struct dma_fence *fence;
+	struct drm_suballoc *sa_bo = NULL;
+	struct xe_vma *vma = pt_update->vma;
+	struct xe_bb *bb;
+	u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0;
+	u64 addr;
+	int err = 0;
+	bool usm = !q && xe->info.has_usm;
+	bool first_munmap_rebind = vma &&
+		vma->gpuva.flags & XE_VMA_FIRST_REBIND;
+	struct xe_exec_queue *q_override = !q ? m->q : q;
+	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
+
+	/* Use the CPU if no in syncs and engine is idle */
+	if (no_in_syncs(vm, q, syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) {
+		fence =  xe_migrate_update_pgtables_cpu(m, vm, bo, updates,
+							num_updates,
+							first_munmap_rebind,
+							pt_update);
+		if (!IS_ERR(fence) || fence == ERR_PTR(-EAGAIN))
+			return fence;
+	}
+
+	/* fixed + PTE entries */
+	if (IS_DGFX(xe))
+		batch_size = 2;
+	else
+		batch_size = 6 + num_updates * 2;
+
+	for (i = 0; i < num_updates; i++) {
+		u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff);
+
+		/* align noop + MI_STORE_DATA_IMM cmd prefix */
+		batch_size += 4 * num_cmds + updates[i].qwords * 2;
+	}
+
+	/*
+	 * XXX: Create temp bo to copy from, if batch_size becomes too big?
+	 *
+	 * Worst case: Sum(2 * (each lower level page size) + (top level page size))
+	 * Should be reasonably bound..
+	 */
+	xe_tile_assert(tile, batch_size < SZ_128K);
+
+	bb = xe_bb_new(gt, batch_size, !q && xe->info.has_usm);
+	if (IS_ERR(bb))
+		return ERR_CAST(bb);
+
+	/* For sysmem PTE's, need to map them in our hole.. */
+	if (!IS_DGFX(xe)) {
+		ppgtt_ofs = NUM_KERNEL_PDE - 1;
+		if (q) {
+			xe_tile_assert(tile, num_updates <= NUM_VMUSA_WRITES_PER_UNIT);
+
+			sa_bo = drm_suballoc_new(&m->vm_update_sa, 1,
+						 GFP_KERNEL, true, 0);
+			if (IS_ERR(sa_bo)) {
+				err = PTR_ERR(sa_bo);
+				goto err;
+			}
+
+			ppgtt_ofs = NUM_KERNEL_PDE +
+				(drm_suballoc_soffset(sa_bo) /
+				 NUM_VMUSA_UNIT_PER_PAGE);
+			page_ofs = (drm_suballoc_soffset(sa_bo) %
+				    NUM_VMUSA_UNIT_PER_PAGE) *
+				VM_SA_UPDATE_UNIT_SIZE;
+		}
+
+		/* Map our PT's to gtt */
+		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(num_updates);
+		bb->cs[bb->len++] = ppgtt_ofs * XE_PAGE_SIZE + page_ofs;
+		bb->cs[bb->len++] = 0; /* upper_32_bits */
+
+		for (i = 0; i < num_updates; i++) {
+			struct xe_bo *pt_bo = updates[i].pt_bo;
+
+			xe_tile_assert(tile, pt_bo->size == SZ_4K);
+
+			addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, pat_index, 0);
+			bb->cs[bb->len++] = lower_32_bits(addr);
+			bb->cs[bb->len++] = upper_32_bits(addr);
+		}
+
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		addr = xe_migrate_vm_addr(ppgtt_ofs, 0) +
+			(page_ofs / sizeof(u64)) * XE_PAGE_SIZE;
+		for (i = 0; i < num_updates; i++)
+			write_pgtable(tile, bb, addr + i * XE_PAGE_SIZE,
+				      &updates[i], pt_update);
+	} else {
+		/* phys pages, no preamble required */
+		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+		update_idx = bb->len;
+
+		for (i = 0; i < num_updates; i++)
+			write_pgtable(tile, bb, 0, &updates[i], pt_update);
+	}
+
+	if (!q)
+		mutex_lock(&m->job_mutex);
+
+	job = xe_bb_create_migration_job(q ?: m->q, bb,
+					 xe_migrate_batch_base(m, usm),
+					 update_idx);
+	if (IS_ERR(job)) {
+		err = PTR_ERR(job);
+		goto err_bb;
+	}
+
+	/* Wait on BO move */
+	if (bo) {
+		err = job_add_deps(job, bo->ttm.base.resv,
+				   DMA_RESV_USAGE_KERNEL);
+		if (err)
+			goto err_job;
+	}
+
+	/*
+	 * Munmap style VM unbind, need to wait for all jobs to be complete /
+	 * trigger preempts before moving forward
+	 */
+	if (first_munmap_rebind) {
+		err = job_add_deps(job, xe_vm_resv(vm),
+				   DMA_RESV_USAGE_BOOKKEEP);
+		if (err)
+			goto err_job;
+	}
+
+	err = xe_sched_job_last_fence_add_dep(job, vm);
+	for (i = 0; !err && i < num_syncs; i++)
+		err = xe_sync_entry_add_deps(&syncs[i], job);
+
+	if (err)
+		goto err_job;
+
+	if (ops->pre_commit) {
+		pt_update->job = job;
+		err = ops->pre_commit(pt_update);
+		if (err)
+			goto err_job;
+	}
+	xe_sched_job_arm(job);
+	fence = dma_fence_get(&job->drm.s_fence->finished);
+	xe_sched_job_push(job);
+
+	if (!q)
+		mutex_unlock(&m->job_mutex);
+
+	xe_bb_free(bb, fence);
+	drm_suballoc_free(sa_bo, fence);
+
+	return fence;
+
+err_job:
+	xe_sched_job_put(job);
+err_bb:
+	if (!q)
+		mutex_unlock(&m->job_mutex);
+	xe_bb_free(bb, NULL);
+err:
+	drm_suballoc_free(sa_bo, NULL);
+	return ERR_PTR(err);
+}
+
+/**
+ * xe_migrate_wait() - Complete all operations using the xe_migrate context
+ * @m: Migrate context to wait for.
+ *
+ * Waits until the GPU no longer uses the migrate context's default engine
+ * or its page-table objects. FIXME: What about separate page-table update
+ * engines?
+ */
+void xe_migrate_wait(struct xe_migrate *m)
+{
+	if (m->fence)
+		dma_fence_wait(m->fence, false);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_migrate.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
new file mode 100644
index 000000000000..951f19318ea4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef _XE_MIGRATE_
+#define _XE_MIGRATE_
+
+#include <drm/drm_mm.h>
+
+struct dma_fence;
+struct iosys_map;
+struct ttm_resource;
+
+struct xe_bo;
+struct xe_gt;
+struct xe_exec_queue;
+struct xe_migrate;
+struct xe_migrate_pt_update;
+struct xe_sync_entry;
+struct xe_pt;
+struct xe_tile;
+struct xe_vm;
+struct xe_vm_pgtable_update;
+struct xe_vma;
+
+/**
+ * struct xe_migrate_pt_update_ops - Callbacks for the
+ * xe_migrate_update_pgtables() function.
+ */
+struct xe_migrate_pt_update_ops {
+	/**
+	 * @populate: Populate a command buffer or page-table with ptes.
+	 * @pt_update: Embeddable callback argument.
+	 * @tile: The tile for the current operation.
+	 * @map: struct iosys_map into the memory to be populated.
+	 * @pos: If @map is NULL, map into the memory to be populated.
+	 * @ofs: qword offset into @map, unused if @map is NULL.
+	 * @num_qwords: Number of qwords to write.
+	 * @update: Information about the PTEs to be inserted.
+	 *
+	 * This interface is intended to be used as a callback into the
+	 * page-table system to populate command buffers or shared
+	 * page-tables with PTEs.
+	 */
+	void (*populate)(struct xe_migrate_pt_update *pt_update,
+			 struct xe_tile *tile, struct iosys_map *map,
+			 void *pos, u32 ofs, u32 num_qwords,
+			 const struct xe_vm_pgtable_update *update);
+
+	/**
+	 * @pre_commit: Callback to be called just before arming the
+	 * sched_job.
+	 * @pt_update: Pointer to embeddable callback argument.
+	 *
+	 * Return: 0 on success, negative error code on error.
+	 */
+	int (*pre_commit)(struct xe_migrate_pt_update *pt_update);
+};
+
+/**
+ * struct xe_migrate_pt_update - Argument to the
+ * struct xe_migrate_pt_update_ops callbacks.
+ *
+ * Intended to be subclassed to support additional arguments if necessary.
+ */
+struct xe_migrate_pt_update {
+	/** @ops: Pointer to the struct xe_migrate_pt_update_ops callbacks */
+	const struct xe_migrate_pt_update_ops *ops;
+	/** @vma: The vma we're updating the pagetable for. */
+	struct xe_vma *vma;
+	/** @job: The job if a GPU page-table update. NULL otherwise */
+	struct xe_sched_job *job;
+	/** @start: Start of update for the range fence */
+	u64 start;
+	/** @last: Last of update for the range fence */
+	u64 last;
+	/** @tile_id: Tile ID of the update */
+	u8 tile_id;
+};
+
+struct xe_migrate *xe_migrate_init(struct xe_tile *tile);
+
+struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
+				  struct xe_bo *src_bo,
+				  struct xe_bo *dst_bo,
+				  struct ttm_resource *src,
+				  struct ttm_resource *dst,
+				  bool copy_only_ccs);
+
+struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
+				   struct xe_bo *bo,
+				   struct ttm_resource *dst);
+
+struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m);
+
+struct dma_fence *
+xe_migrate_update_pgtables(struct xe_migrate *m,
+			   struct xe_vm *vm,
+			   struct xe_bo *bo,
+			   struct xe_exec_queue *q,
+			   const struct xe_vm_pgtable_update *updates,
+			   u32 num_updates,
+			   struct xe_sync_entry *syncs, u32 num_syncs,
+			   struct xe_migrate_pt_update *pt_update);
+
+void xe_migrate_wait(struct xe_migrate *m);
+
+struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h
new file mode 100644
index 000000000000..63c7d67b5b62
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate_doc.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_MIGRATE_DOC_H_
+#define _XE_MIGRATE_DOC_H_
+
+/**
+ * DOC: Migrate Layer
+ *
+ * The XE migrate layer is used generate jobs which can copy memory (eviction),
+ * clear memory, or program tables (binds). This layer exists in every GT, has
+ * a migrate engine, and uses a special VM for all generated jobs.
+ *
+ * Special VM details
+ * ==================
+ *
+ * The special VM is configured with a page structure where we can dynamically
+ * map BOs which need to be copied and cleared, dynamically map other VM's page
+ * table BOs for updates, and identity map the entire device's VRAM with 1 GB
+ * pages.
+ *
+ * Currently the page structure consists of 32 physical pages with 16 being
+ * reserved for BO mapping during copies and clear, 1 reserved for kernel binds,
+ * several pages are needed to setup the identity mappings (exact number based
+ * on how many bits of address space the device has), and the rest are reserved
+ * user bind operations.
+ *
+ * TODO: Diagram of layout
+ *
+ * Bind jobs
+ * =========
+ *
+ * A bind job consist of two batches and runs either on the migrate engine
+ * (kernel binds) or the bind engine passed in (user binds). In both cases the
+ * VM of the engine is the migrate VM.
+ *
+ * The first batch is used to update the migration VM page structure to point to
+ * the bind VM page table BOs which need to be updated. A physical page is
+ * required for this. If it is a user bind, the page is allocated from pool of
+ * pages reserved user bind operations with drm_suballoc managing this pool. If
+ * it is a kernel bind, the page reserved for kernel binds is used.
+ *
+ * The first batch is only required for devices without VRAM as when the device
+ * has VRAM the bind VM page table BOs are in VRAM and the identity mapping can
+ * be used.
+ *
+ * The second batch is used to program page table updated in the bind VM. Why
+ * not just one batch? Well the TLBs need to be invalidated between these two
+ * batches and that only can be done from the ring.
+ *
+ * When the bind job complete, the page allocated is returned the pool of pages
+ * reserved for user bind operations if a user bind. No need do this for kernel
+ * binds as the reserved kernel page is serially used by each job.
+ *
+ * Copy / clear jobs
+ * =================
+ *
+ * A copy or clear job consist of two batches and runs on the migrate engine.
+ *
+ * Like binds, the first batch is used update the migration VM page structure.
+ * In copy jobs, we need to map the source and destination of the BO into page
+ * the structure. In clear jobs, we just need to add 1 mapping of BO into the
+ * page structure. We use the 16 reserved pages in migration VM for mappings,
+ * this gives us a maximum copy size of 16 MB and maximum clear size of 32 MB.
+ *
+ * The second batch is used do either do the copy or clear. Again similar to
+ * binds, two batches are required as the TLBs need to be invalidated from the
+ * ring between the batches.
+ *
+ * More than one job will be generated if the BO is larger than maximum copy /
+ * clear size.
+ *
+ * Future work
+ * ===========
+ *
+ * Update copy and clear code to use identity mapped VRAM.
+ *
+ * Can we rework the use of the pages async binds to use all the entries in each
+ * page?
+ *
+ * Using large pages for sysmem mappings.
+ *
+ * Is it possible to identity map the sysmem? We should explore this.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
new file mode 100644
index 000000000000..f660cfb79f50
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -0,0 +1,524 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2023 Intel Corporation
+ */
+
+#include <linux/minmax.h>
+
+#include "xe_mmio.h"
+
+#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
+
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_macros.h"
+#include "xe_module.h"
+#include "xe_tile.h"
+
+#define XEHP_MTCFG_ADDR		XE_REG(0x101800)
+#define TILE_COUNT		REG_GENMASK(15, 8)
+
+#define BAR_SIZE_SHIFT 20
+
+static void
+_resize_bar(struct xe_device *xe, int resno, resource_size_t size)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	int bar_size = pci_rebar_bytes_to_size(size);
+	int ret;
+
+	if (pci_resource_len(pdev, resno))
+		pci_release_resource(pdev, resno);
+
+	ret = pci_resize_resource(pdev, resno, bar_size);
+	if (ret) {
+		drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n",
+			 resno, 1 << bar_size, ERR_PTR(ret));
+		return;
+	}
+
+	drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size);
+}
+
+/*
+ * if force_vram_bar_size is set, attempt to set to the requested size
+ * else set to maximum possible size
+ */
+static void xe_resize_vram_bar(struct xe_device *xe)
+{
+	u64 force_vram_bar_size = xe_modparam.force_vram_bar_size;
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct pci_bus *root = pdev->bus;
+	resource_size_t current_size;
+	resource_size_t rebar_size;
+	struct resource *root_res;
+	u32 bar_size_mask;
+	u32 pci_cmd;
+	int i;
+
+	/* gather some relevant info */
+	current_size = pci_resource_len(pdev, LMEM_BAR);
+	bar_size_mask = pci_rebar_get_possible_sizes(pdev, LMEM_BAR);
+
+	if (!bar_size_mask)
+		return;
+
+	/* set to a specific size? */
+	if (force_vram_bar_size) {
+		u32 bar_size_bit;
+
+		rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M;
+
+		bar_size_bit = bar_size_mask & BIT(pci_rebar_bytes_to_size(rebar_size));
+
+		if (!bar_size_bit) {
+			drm_info(&xe->drm,
+				 "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n",
+				 (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20);
+			return;
+		}
+
+		rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT);
+
+		if (rebar_size == current_size)
+			return;
+	} else {
+		rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT);
+
+		/* only resize if larger than current */
+		if (rebar_size <= current_size)
+			return;
+	}
+
+	drm_info(&xe->drm, "Attempting to resize bar from %lluMiB -> %lluMiB\n",
+		 (u64)current_size >> 20, (u64)rebar_size >> 20);
+
+	while (root->parent)
+		root = root->parent;
+
+	pci_bus_for_each_resource(root, root_res, i) {
+		if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
+		    root_res->start > 0x100000000ull)
+			break;
+	}
+
+	if (!root_res) {
+		drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n");
+		return;
+	}
+
+	pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
+	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
+
+	_resize_bar(xe, LMEM_BAR, rebar_size);
+
+	pci_assign_unassigned_bus_resources(pdev->bus);
+	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
+}
+
+static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
+{
+	if (!pci_resource_flags(pdev, bar))
+		return false;
+
+	if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET)
+		return false;
+
+	if (!pci_resource_len(pdev, bar))
+		return false;
+
+	return true;
+}
+
+static int xe_determine_lmem_bar_size(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+
+	if (!xe_pci_resource_valid(pdev, LMEM_BAR)) {
+		drm_err(&xe->drm, "pci resource is not valid\n");
+		return -ENXIO;
+	}
+
+	xe_resize_vram_bar(xe);
+
+	xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR);
+	xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR);
+	if (!xe->mem.vram.io_size)
+		return -EIO;
+
+	/* XXX: Need to change when xe link code is ready */
+	xe->mem.vram.dpa_base = 0;
+
+	/* set up a map to the total memory area. */
+	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
+
+	return 0;
+}
+
+/**
+ * xe_mmio_tile_vram_size() - Collect vram size and offset information
+ * @tile: tile to get info for
+ * @vram_size: available vram (size - device reserved portions)
+ * @tile_size: actual vram size
+ * @tile_offset: physical start point in the vram address space
+ *
+ * There are 4 places for size information:
+ * - io size (from pci_resource_len of LMEM bar) (only used for small bar and DG1)
+ * - TILEx size (actual vram size)
+ * - GSMBASE offset (TILEx - "stolen")
+ * - CSSBASE offset (TILEx - CSS space necessary)
+ *
+ * CSSBASE is always a lower/smaller offset then GSMBASE.
+ *
+ * The actual available size of memory is to the CCS or GSM base.
+ * NOTE: multi-tile bases will include the tile offset.
+ *
+ */
+static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size,
+				  u64 *tile_size, u64 *tile_offset)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_gt *gt = tile->primary_gt;
+	u64 offset;
+	int err;
+	u32 reg;
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	/* actual size */
+	if (unlikely(xe->info.platform == XE_DG1)) {
+		*tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR);
+		*tile_offset = 0;
+	} else {
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id));
+		*tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G;
+		*tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G;
+	}
+
+	/* minus device usage */
+	if (xe->info.has_flat_ccs) {
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
+		offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
+	} else {
+		offset = xe_mmio_read64_2x32(gt, GSMBASE);
+	}
+
+	/* remove the tile offset so we have just the available size */
+	*vram_size = offset - *tile_offset;
+
+	return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+int xe_mmio_probe_vram(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	resource_size_t io_size;
+	u64 available_size = 0;
+	u64 total_size = 0;
+	u64 tile_offset;
+	u64 tile_size;
+	u64 vram_size;
+	int err;
+	u8 id;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	/* Get the size of the root tile's vram for later accessibility comparison */
+	tile = xe_device_get_root_tile(xe);
+	err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
+	if (err)
+		return err;
+
+	err = xe_determine_lmem_bar_size(xe);
+	if (err)
+		return err;
+
+	drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+		 &xe->mem.vram.io_size);
+
+	io_size = xe->mem.vram.io_size;
+
+	/* tile specific ranges */
+	for_each_tile(tile, xe, id) {
+		err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
+		if (err)
+			return err;
+
+		tile->mem.vram.actual_physical_size = tile_size;
+		tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset;
+		tile->mem.vram.io_size = min_t(u64, vram_size, io_size);
+
+		if (!tile->mem.vram.io_size) {
+			drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n");
+			return -ENODEV;
+		}
+
+		tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset;
+		tile->mem.vram.usable_size = vram_size;
+		tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset;
+
+		if (tile->mem.vram.io_size < tile->mem.vram.usable_size)
+			drm_info(&xe->drm, "Small BAR device\n");
+		drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id,
+			 tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size);
+		drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id,
+			 &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + tile->mem.vram.actual_physical_size,
+			 &tile->mem.vram.io_start, tile->mem.vram.io_start + tile->mem.vram.io_size);
+
+		/* calculate total size using tile size to get the correct HW sizing */
+		total_size += tile_size;
+		available_size += vram_size;
+
+		if (total_size > xe->mem.vram.io_size) {
+			drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n",
+				 &total_size, &xe->mem.vram.io_size);
+		}
+
+		io_size -= min_t(u64, tile_size, io_size);
+	}
+
+	xe->mem.vram.actual_physical_size = total_size;
+
+	drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+		 &xe->mem.vram.actual_physical_size);
+	drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+		 &available_size);
+
+	return 0;
+}
+
+void xe_mmio_probe_tiles(struct xe_device *xe)
+{
+	size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size;
+	u8 id, tile_count = xe->info.tile_count;
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	struct xe_tile *tile;
+	void *regs;
+	u32 mtcfg;
+
+	if (tile_count == 1)
+		goto add_mmio_ext;
+
+	if (!xe->info.skip_mtcfg) {
+		mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
+		tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
+		if (tile_count < xe->info.tile_count) {
+			drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n",
+					xe->info.tile_count, tile_count);
+			xe->info.tile_count = tile_count;
+
+			/*
+			 * FIXME: Needs some work for standalone media, but should be impossible
+			 * with multi-tile for now.
+			 */
+			xe->info.gt_count = xe->info.tile_count;
+		}
+	}
+
+	regs = xe->mmio.regs;
+	for_each_tile(tile, xe, id) {
+		tile->mmio.size = tile_mmio_size;
+		tile->mmio.regs = regs;
+		regs += tile_mmio_size;
+	}
+
+add_mmio_ext:
+	/*
+	 * By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile).
+	 * When supported, there could be an additional contiguous multi-tile MMIO extension
+	 * space ON TOP of it, and hence the necessity for distinguished MMIO spaces.
+	 */
+	if (xe->info.has_mmio_ext) {
+		regs = xe->mmio.regs + tile_mmio_size * tile_count;
+
+		for_each_tile(tile, xe, id) {
+			tile->mmio_ext.size = tile_mmio_ext_size;
+			tile->mmio_ext.regs = regs;
+
+			regs += tile_mmio_ext_size;
+		}
+	}
+}
+
+static void mmio_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+
+	pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
+	if (xe->mem.vram.mapping)
+		iounmap(xe->mem.vram.mapping);
+}
+
+static int xe_verify_lmem_ready(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+
+	/*
+	 * The boot firmware initializes local memory and assesses its health.
+	 * If memory training fails, the punit will have been instructed to
+	 * keep the GT powered down; we won't be able to communicate with it
+	 * and we should not continue with driver initialization.
+	 */
+	if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) {
+		drm_err(&xe->drm, "VRAM not initialized by firmware\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+int xe_mmio_init(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	const int mmio_bar = 0;
+
+	/*
+	 * Map the entire BAR.
+	 * The first 16MB of the BAR, belong to the root tile, and include:
+	 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
+	 */
+	xe->mmio.size = pci_resource_len(pdev, mmio_bar);
+	xe->mmio.regs = pci_iomap(pdev, mmio_bar, 0);
+	if (xe->mmio.regs == NULL) {
+		drm_err(&xe->drm, "failed to map registers\n");
+		return -EIO;
+	}
+
+	return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe);
+}
+
+int xe_mmio_root_tile_init(struct xe_device *xe)
+{
+	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
+	int err;
+
+	/* Setup first tile; other tiles (if present) will be setup later. */
+	root_tile->mmio.size = SZ_16M;
+	root_tile->mmio.regs = xe->mmio.regs;
+
+	err = xe_verify_lmem_ready(xe);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/**
+ * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
+ * @gt: MMIO target GT
+ * @reg: register to read value from
+ *
+ * Although Intel GPUs have some 64-bit registers, the hardware officially
+ * only supports GTTMMADR register reads of 32 bits or smaller.  Even if
+ * a readq operation may return a reasonable value, that violation of the
+ * spec shouldn't be relied upon and all 64-bit register reads should be
+ * performed as two 32-bit reads of the upper and lower dwords.
+ *
+ * When reading registers that may be changing (such as
+ * counters), a rollover of the lower dword between the two 32-bit reads
+ * can be problematic.  This function attempts to ensure the upper dword has
+ * stabilized before returning the 64-bit value.
+ *
+ * Note that because this function may re-read the register multiple times
+ * while waiting for the value to stabilize it should not be used to read
+ * any registers where read operations have side effects.
+ *
+ * Returns the value of the 64-bit register.
+ */
+u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
+{
+	struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
+	u32 ldw, udw, oldudw, retries;
+
+	if (reg.addr < gt->mmio.adj_limit) {
+		reg.addr += gt->mmio.adj_offset;
+		reg_udw.addr += gt->mmio.adj_offset;
+	}
+
+	oldudw = xe_mmio_read32(gt, reg_udw);
+	for (retries = 5; retries; --retries) {
+		ldw = xe_mmio_read32(gt, reg);
+		udw = xe_mmio_read32(gt, reg_udw);
+
+		if (udw == oldudw)
+			break;
+
+		oldudw = udw;
+	}
+
+	xe_gt_WARN(gt, retries == 0,
+		   "64-bit read of %#x did not stabilize\n", reg.addr);
+
+	return (u64)udw << 32 | ldw;
+}
+
+/**
+ * xe_mmio_wait32() - Wait for a register to match the desired masked value
+ * @gt: MMIO target GT
+ * @reg: register to read value from
+ * @mask: mask to be applied to the value read from the register
+ * @val: desired value after applying the mask
+ * @timeout_us: time out after this period of time. Wait logic tries to be
+ * smart, applying an exponential backoff until @timeout_us is reached.
+ * @out_val: if not NULL, points where to store the last unmasked value
+ * @atomic: needs to be true if calling from an atomic context
+ *
+ * This function polls for the desired masked value and returns zero on success
+ * or -ETIMEDOUT if timed out.
+ *
+ * Note that @timeout_us represents the minimum amount of time to wait before
+ * giving up. The actual time taken by this function can be a little more than
+ * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
+ * it is possible that this function succeeds even after @timeout_us has passed.
+ */
+int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+		   u32 *out_val, bool atomic)
+{
+	ktime_t cur = ktime_get_raw();
+	const ktime_t end = ktime_add_us(cur, timeout_us);
+	int ret = -ETIMEDOUT;
+	s64 wait = 10;
+	u32 read;
+
+	for (;;) {
+		read = xe_mmio_read32(gt, reg);
+		if ((read & mask) == val) {
+			ret = 0;
+			break;
+		}
+
+		cur = ktime_get_raw();
+		if (!ktime_before(cur, end))
+			break;
+
+		if (ktime_after(ktime_add_us(cur, wait), end))
+			wait = ktime_us_delta(end, cur);
+
+		if (atomic)
+			udelay(wait);
+		else
+			usleep_range(wait, wait << 1);
+		wait <<= 1;
+	}
+
+	if (ret != 0) {
+		read = xe_mmio_read32(gt, reg);
+		if ((read & mask) == val)
+			ret = 0;
+	}
+
+	if (out_val)
+		*out_val = read;
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
new file mode 100644
index 000000000000..98de5c13c89b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021-2023 Intel Corporation
+ */
+
+#ifndef _XE_MMIO_H_
+#define _XE_MMIO_H_
+
+#include <linux/delay.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#include "regs/xe_reg_defs.h"
+#include "xe_device_types.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_types.h"
+
+struct drm_device;
+struct drm_file;
+struct xe_device;
+
+#define LMEM_BAR		2
+
+int xe_mmio_init(struct xe_device *xe);
+int xe_mmio_root_tile_init(struct xe_device *xe);
+void xe_mmio_probe_tiles(struct xe_device *xe);
+
+static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+static inline u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+static inline void xe_mmio_write32(struct xe_gt *gt,
+				   struct xe_reg reg, u32 val)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
+				u32 set)
+{
+	u32 old, reg_val;
+
+	old = xe_mmio_read32(gt, reg);
+	reg_val = (old & ~clr) | set;
+	xe_mmio_write32(gt, reg, reg_val);
+
+	return old;
+}
+
+static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
+					     struct xe_reg reg, u32 val,
+					     u32 mask, u32 eval)
+{
+	u32 reg_val;
+
+	xe_mmio_write32(gt, reg, val);
+	reg_val = xe_mmio_read32(gt, reg);
+
+	return (reg_val & mask) != eval ? -EINVAL : 0;
+}
+
+static inline bool xe_mmio_in_range(const struct xe_gt *gt,
+				    const struct xe_mmio_range *range,
+				    struct xe_reg reg)
+{
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	return range && reg.addr >= range->start && reg.addr <= range->end;
+}
+
+int xe_mmio_probe_vram(struct xe_device *xe);
+u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
+int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+		   u32 *out_val, bool atomic);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
new file mode 100644
index 000000000000..ef79552e4f2f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -0,0 +1,580 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_mocs.h"
+
+#include "regs/xe_gt_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "xe_step_types.h"
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+#define mocs_dbg drm_dbg
+#else
+__printf(2, 3)
+static inline void mocs_dbg(const struct drm_device *dev,
+			    const char *format, ...)
+{ /* noop */ }
+#endif
+
+enum {
+	HAS_GLOBAL_MOCS = BIT(0),
+	HAS_LNCF_MOCS = BIT(1),
+};
+
+struct xe_mocs_entry {
+	u32 control_value;
+	u16 l3cc_value;
+	u16 used;
+};
+
+struct xe_mocs_info {
+	unsigned int size;
+	unsigned int n_entries;
+	const struct xe_mocs_entry *table;
+	u8 uc_index;
+	u8 wb_index;
+	u8 unused_entries_index;
+};
+
+/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
+#define _LE_CACHEABILITY(value)	((value) << 0)
+#define _LE_TGT_CACHE(value)	((value) << 2)
+#define LE_LRUM(value)		((value) << 4)
+#define LE_AOM(value)		((value) << 6)
+#define LE_RSC(value)		((value) << 7)
+#define LE_SCC(value)		((value) << 8)
+#define LE_PFM(value)		((value) << 11)
+#define LE_SCF(value)		((value) << 14)
+#define LE_COS(value)		((value) << 15)
+#define LE_SSE(value)		((value) << 17)
+
+/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
+#define L3_ESC(value)		((value) << 0)
+#define L3_SCC(value)		((value) << 1)
+#define _L3_CACHEABILITY(value)	((value) << 4)
+#define L3_GLBGO(value)		((value) << 6)
+#define L3_LKUP(value)		((value) << 7)
+
+/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
+#define IG_PAT				REG_BIT(8)
+#define L3_CACHE_POLICY_MASK		REG_GENMASK(5, 4)
+#define L4_CACHE_POLICY_MASK		REG_GENMASK(3, 2)
+
+/* Helper defines */
+#define XELP_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
+#define PVC_NUM_MOCS_ENTRIES	3
+#define MTL_NUM_MOCS_ENTRIES    16
+#define XE2_NUM_MOCS_ENTRIES	16
+
+/* (e)LLC caching options */
+/*
+ * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
+ * the same as LE_UC
+ */
+#define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
+#define LE_1_UC			_LE_CACHEABILITY(1)
+#define LE_2_WT			_LE_CACHEABILITY(2)
+#define LE_3_WB			_LE_CACHEABILITY(3)
+
+/* Target cache */
+#define LE_TC_0_PAGETABLE	_LE_TGT_CACHE(0)
+#define LE_TC_1_LLC		_LE_TGT_CACHE(1)
+#define LE_TC_2_LLC_ELLC	_LE_TGT_CACHE(2)
+#define LE_TC_3_LLC_ELLC_ALT	_LE_TGT_CACHE(3)
+
+/* L3 caching options */
+#define L3_0_DIRECT		_L3_CACHEABILITY(0)
+#define L3_1_UC			_L3_CACHEABILITY(1)
+#define L3_2_RESERVED		_L3_CACHEABILITY(2)
+#define L3_3_WB			_L3_CACHEABILITY(3)
+
+/* L4 caching options */
+#define L4_0_WB                 REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 0)
+#define L4_1_WT                 REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 1)
+#define L4_3_UC                 REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 3)
+
+#define XE2_L3_0_WB		REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 0)
+/* XD: WB Transient Display */
+#define XE2_L3_1_XD		REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 1)
+#define XE2_L3_3_UC		REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 3)
+
+#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
+	[__idx] = { \
+		.control_value = __control_value, \
+		.l3cc_value = __l3cc_value, \
+		.used = 1, \
+	}
+
+/*
+ * MOCS tables
+ *
+ * These are the MOCS tables that are programmed across all the rings.
+ * The control value is programmed to all the rings that support the
+ * MOCS registers. While the l3cc_values are only programmed to the
+ * LNCFCMOCS0 - LNCFCMOCS32 registers.
+ *
+ * These tables are intended to be kept reasonably consistent across
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, the list of entries is published as part of bspec.
+ *
+ * Entries not part of the following tables are undefined as far as userspace is
+ * concerned and shouldn't be relied upon. The last few entries are reserved by
+ * the hardware. They should be initialized according to bspec and never used.
+ *
+ * NOTE1: These tables are part of bspec and defined as part of the hardware
+ * interface. It is expected that, for specific hardware platform, existing
+ * entries will remain constant and the table will only be updated by adding new
+ * entries, filling unused positions.
+ *
+ * NOTE2: Reserved and unspecified MOCS indices have been set to L3 WB. These
+ * reserved entries should never be used. They may be changed to low performant
+ * variants with better coherency in the future if more entries are needed.
+ */
+
+static const struct xe_mocs_entry gen12_mocs_desc[] = {
+	/* Base - L3 + LLC */
+	MOCS_ENTRY(2,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_3_WB),
+	/* Base - Uncached */
+	MOCS_ENTRY(3,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_1_UC),
+	/* Base - L3 */
+	MOCS_ENTRY(4,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+	/* Base - LLC */
+	MOCS_ENTRY(5,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* Age 0 - LLC */
+	MOCS_ENTRY(6,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1),
+		   L3_1_UC),
+	/* Age 0 - L3 + LLC */
+	MOCS_ENTRY(7,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1),
+		   L3_3_WB),
+	/* Age: Don't Chg. - LLC */
+	MOCS_ENTRY(8,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2),
+		   L3_1_UC),
+	/* Age: Don't Chg. - L3 + LLC */
+	MOCS_ENTRY(9,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2),
+		   L3_3_WB),
+	/* No AOM - LLC */
+	MOCS_ENTRY(10,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1),
+		   L3_1_UC),
+	/* No AOM - L3 + LLC */
+	MOCS_ENTRY(11,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1),
+		   L3_3_WB),
+	/* No AOM; Age 0 - LLC */
+	MOCS_ENTRY(12,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1),
+		   L3_1_UC),
+	/* No AOM; Age 0 - L3 + LLC */
+	MOCS_ENTRY(13,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1),
+		   L3_3_WB),
+	/* No AOM; Age:DC - LLC */
+	MOCS_ENTRY(14,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1),
+		   L3_1_UC),
+	/* No AOM; Age:DC - L3 + LLC */
+	MOCS_ENTRY(15,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1),
+		   L3_3_WB),
+	/* Self-Snoop - L3 + LLC */
+	MOCS_ENTRY(18,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(12.5%) */
+	MOCS_ENTRY(19,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(25%) */
+	MOCS_ENTRY(20,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(50%) */
+	MOCS_ENTRY(21,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(75%) */
+	MOCS_ENTRY(22,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3),
+		   L3_3_WB),
+	/* Skip Caching - L3 + LLC(87.5%) */
+	MOCS_ENTRY(23,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7),
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+	MOCS_ENTRY(48,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + L3 */
+	MOCS_ENTRY(49,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + LLC */
+	MOCS_ENTRY(50,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* Implicitly enable L1 - HDC:L1 */
+	MOCS_ENTRY(51,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_1_UC),
+	/* HW Special Case (CCS) */
+	MOCS_ENTRY(60,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* HW Special Case (Displayable) */
+	MOCS_ENTRY(61,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+	/* HW Reserved - SW program but never use */
+	MOCS_ENTRY(62,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* HW Reserved - SW program but never use */
+	MOCS_ENTRY(63,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC)
+};
+
+static const struct xe_mocs_entry dg1_mocs_desc[] = {
+	/* UC */
+	MOCS_ENTRY(1, 0, L3_1_UC),
+	/* WB - L3 */
+	MOCS_ENTRY(5, 0, L3_3_WB),
+	/* WB - L3 50% */
+	MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB),
+	/* WB - L3 25% */
+	MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB),
+	/* WB - L3 12.5% */
+	MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB),
+
+	/* HDC:L1 + L3 */
+	MOCS_ENTRY(48, 0, L3_3_WB),
+	/* HDC:L1 */
+	MOCS_ENTRY(49, 0, L3_1_UC),
+
+	/* HW Reserved */
+	MOCS_ENTRY(60, 0, L3_1_UC),
+	MOCS_ENTRY(61, 0, L3_1_UC),
+	MOCS_ENTRY(62, 0, L3_1_UC),
+	MOCS_ENTRY(63, 0, L3_1_UC),
+};
+
+static const struct xe_mocs_entry dg2_mocs_desc[] = {
+	/* UC - Coherent; GO:L3 */
+	MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)),
+	/* UC - Coherent; GO:Memory */
+	MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+	/* UC - Non-Coherent; GO:Memory */
+	MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+	/* WB - LC */
+	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
+static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = {
+	/* Wa_14011441408: Set Go to Memory for MOCS#0 */
+	MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+	/* UC - Coherent; GO:Memory */
+	MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+	/* UC - Non-Coherent; GO:Memory */
+	MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+	/* WB - LC */
+	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
+static const struct xe_mocs_entry pvc_mocs_desc[] = {
+	/* Error */
+	MOCS_ENTRY(0, 0, L3_3_WB),
+
+	/* UC */
+	MOCS_ENTRY(1, 0, L3_1_UC),
+
+	/* WB */
+	MOCS_ENTRY(2, 0, L3_3_WB),
+};
+
+static const struct xe_mocs_entry mtl_mocs_desc[] = {
+	/* Error - Reserved for Non-Use */
+	MOCS_ENTRY(0,
+		   0,
+		   L3_LKUP(1) | L3_3_WB),
+	/* Cached - L3 + L4 */
+	MOCS_ENTRY(1,
+		   IG_PAT,
+		   L3_LKUP(1) | L3_3_WB),
+	/* L4 - GO:L3 */
+	MOCS_ENTRY(2,
+		   IG_PAT,
+		   L3_LKUP(1) | L3_1_UC),
+	/* Uncached - GO:L3 */
+	MOCS_ENTRY(3,
+		   IG_PAT | L4_3_UC,
+		   L3_LKUP(1) | L3_1_UC),
+	/* L4 - GO:Mem */
+	MOCS_ENTRY(4,
+		   IG_PAT,
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - GO:Mem */
+	MOCS_ENTRY(5,
+		   IG_PAT | L4_3_UC,
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(6,
+		   IG_PAT,
+		   L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(7,
+		   IG_PAT | L4_3_UC,
+		   L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(8,
+		   IG_PAT,
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(9,
+		   IG_PAT | L4_3_UC,
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Display - L3; L4:WT */
+	MOCS_ENTRY(14,
+		   IG_PAT | L4_1_WT,
+		   L3_LKUP(1) | L3_3_WB),
+	/* CCS - Non-Displayable */
+	MOCS_ENTRY(15,
+		   IG_PAT,
+		   L3_GLBGO(1) | L3_1_UC),
+};
+
+static const struct xe_mocs_entry xe2_mocs_table[] = {
+	/* Defer to PAT */
+	MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0),
+	/* Cached L3, Uncached L4 */
+	MOCS_ENTRY(1, IG_PAT | XE2_L3_0_WB | L4_3_UC, 0),
+	/* Uncached L3, Cached L4 */
+	MOCS_ENTRY(2, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0),
+	/* Uncached L3 + L4 */
+	MOCS_ENTRY(3, IG_PAT | XE2_L3_3_UC | L4_3_UC, 0),
+	/* Cached L3 + L4 */
+	MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0),
+};
+
+static unsigned int get_mocs_settings(struct xe_device *xe,
+				      struct xe_mocs_info *info)
+{
+	unsigned int flags = 0;
+
+	memset(info, 0, sizeof(struct xe_mocs_info));
+
+	switch (xe->info.platform) {
+	case XE_LUNARLAKE:
+		info->size = ARRAY_SIZE(xe2_mocs_table);
+		info->table = xe2_mocs_table;
+		info->n_entries = XE2_NUM_MOCS_ENTRIES;
+		info->uc_index = 3;
+		info->wb_index = 4;
+		info->unused_entries_index = 4;
+		break;
+	case XE_PVC:
+		info->size = ARRAY_SIZE(pvc_mocs_desc);
+		info->table = pvc_mocs_desc;
+		info->n_entries = PVC_NUM_MOCS_ENTRIES;
+		info->uc_index = 1;
+		info->wb_index = 2;
+		info->unused_entries_index = 2;
+		break;
+	case XE_METEORLAKE:
+		info->size = ARRAY_SIZE(mtl_mocs_desc);
+		info->table = mtl_mocs_desc;
+		info->n_entries = MTL_NUM_MOCS_ENTRIES;
+		info->uc_index = 9;
+		info->unused_entries_index = 1;
+		break;
+	case XE_DG2:
+		if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 &&
+		    xe->info.step.graphics >= STEP_A0 &&
+		    xe->info.step.graphics <= STEP_B0) {
+			info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax);
+			info->table = dg2_mocs_desc_g10_ax;
+		} else {
+			info->size = ARRAY_SIZE(dg2_mocs_desc);
+			info->table = dg2_mocs_desc;
+		}
+		info->uc_index = 1;
+		info->n_entries = XELP_NUM_MOCS_ENTRIES;
+		info->unused_entries_index = 3;
+		break;
+	case XE_DG1:
+		info->size = ARRAY_SIZE(dg1_mocs_desc);
+		info->table = dg1_mocs_desc;
+		info->uc_index = 1;
+		info->n_entries = XELP_NUM_MOCS_ENTRIES;
+		info->unused_entries_index = 5;
+		break;
+	case XE_TIGERLAKE:
+	case XE_ROCKETLAKE:
+	case XE_ALDERLAKE_S:
+	case XE_ALDERLAKE_P:
+	case XE_ALDERLAKE_N:
+		info->size  = ARRAY_SIZE(gen12_mocs_desc);
+		info->table = gen12_mocs_desc;
+		info->n_entries = XELP_NUM_MOCS_ENTRIES;
+		info->uc_index = 3;
+		info->unused_entries_index = 2;
+		break;
+	default:
+		drm_err(&xe->drm, "Platform that should have a MOCS table does not.\n");
+		return 0;
+	}
+
+	/*
+	 * Index 0 is a reserved/unused table entry on most platforms, but
+	 * even on those where it does represent a legitimate MOCS entry, it
+	 * never represents the "most cached, least coherent" behavior we want
+	 * to populate undefined table rows with.  So if unused_entries_index
+	 * is still 0 at this point, we'll assume that it was omitted by
+	 * mistake in the switch statement above.
+	 */
+	xe_assert(xe, info->unused_entries_index != 0);
+
+	if (XE_WARN_ON(info->size > info->n_entries)) {
+		info->table = NULL;
+		return 0;
+	}
+
+	if (!IS_DGFX(xe) || GRAPHICS_VER(xe) >= 20)
+		flags |= HAS_GLOBAL_MOCS;
+	if (GRAPHICS_VER(xe) < 20)
+		flags |= HAS_LNCF_MOCS;
+
+	return flags;
+}
+
+/*
+ * Get control_value from MOCS entry.  If the table entry is not defined, the
+ * settings from unused_entries_index will be returned.
+ */
+static u32 get_entry_control(const struct xe_mocs_info *info,
+			     unsigned int index)
+{
+	if (index < info->size && info->table[index].used)
+		return info->table[index].control_value;
+	return info->table[info->unused_entries_index].control_value;
+}
+
+static void __init_mocs_table(struct xe_gt *gt,
+			      const struct xe_mocs_info *info)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	unsigned int i;
+	u32 mocs;
+
+	mocs_dbg(&gt_to_xe(gt)->drm, "entries:%d\n", info->n_entries);
+	drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
+		      "Unused entries index should have been defined\n");
+	for (i = 0;
+	     i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
+	     i++) {
+		mocs_dbg(&gt_to_xe(gt)->drm, "GLOB_MOCS[%d] 0x%x 0x%x\n", i,
+			 XELP_GLOBAL_MOCS(i).addr, mocs);
+
+		if (GRAPHICS_VERx100(gt_to_xe(gt)) > 1250)
+			xe_gt_mcr_multicast_write(gt, XEHP_GLOBAL_MOCS(i), mocs);
+		else
+			xe_mmio_write32(gt, XELP_GLOBAL_MOCS(i), mocs);
+	}
+}
+
+/*
+ * Get l3cc_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is not zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
+ */
+static u16 get_entry_l3cc(const struct xe_mocs_info *info,
+			  unsigned int index)
+{
+	if (index < info->size && info->table[index].used)
+		return info->table[index].l3cc_value;
+	return info->table[info->unused_entries_index].l3cc_value;
+}
+
+static u32 l3cc_combine(u16 low, u16 high)
+{
+	return low | (u32)high << 16;
+}
+
+static void init_l3cc_table(struct xe_gt *gt,
+			    const struct xe_mocs_info *info)
+{
+	unsigned int i;
+	u32 l3cc;
+
+	mocs_dbg(&gt_to_xe(gt)->drm, "entries:%d\n", info->n_entries);
+	for (i = 0;
+	     i < (info->n_entries + 1) / 2 ?
+	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
+				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
+	     i++) {
+		mocs_dbg(&gt_to_xe(gt)->drm, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr,
+			 l3cc);
+
+		if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+			xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc);
+		else
+			xe_mmio_write32(gt, XELP_LNCFCMOCS(i), l3cc);
+	}
+}
+
+void xe_mocs_init_early(struct xe_gt *gt)
+{
+	struct xe_mocs_info table;
+
+	get_mocs_settings(gt_to_xe(gt), &table);
+	gt->mocs.uc_index = table.uc_index;
+	gt->mocs.wb_index = table.wb_index;
+}
+
+void xe_mocs_init(struct xe_gt *gt)
+{
+	struct xe_mocs_info table;
+	unsigned int flags;
+
+	/*
+	 * MOCS settings are split between "GLOB_MOCS" and/or "LNCFCMOCS"
+	 * registers depending on platform.
+	 *
+	 * These registers should be programmed before GuC initialization
+	 * since their values will affect some of the memory transactions
+	 * performed by the GuC.
+	 */
+	flags = get_mocs_settings(gt_to_xe(gt), &table);
+	mocs_dbg(&gt_to_xe(gt)->drm, "flag:0x%x\n", flags);
+
+	if (flags & HAS_GLOBAL_MOCS)
+		__init_mocs_table(gt, &table);
+	if (flags & HAS_LNCF_MOCS)
+		init_l3cc_table(gt, &table);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_mocs.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
new file mode 100644
index 000000000000..053754c5a94e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mocs.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_MOCS_H_
+#define _XE_MOCS_H_
+
+#include <linux/types.h>
+
+struct xe_exec_queue;
+struct xe_gt;
+
+void xe_mocs_init_early(struct xe_gt *gt);
+void xe_mocs_init(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
new file mode 100644
index 000000000000..110b69864656
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_module.h"
+
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include "xe_drv.h"
+#include "xe_hw_fence.h"
+#include "xe_pci.h"
+#include "xe_sched_job.h"
+
+struct xe_modparam xe_modparam = {
+	.enable_display = true,
+	.guc_log_level = 5,
+	.force_probe = CONFIG_DRM_XE_FORCE_PROBE,
+	/* the rest are 0 by default */
+};
+
+module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444);
+MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
+
+module_param_named(enable_display, xe_modparam.enable_display, bool, 0444);
+MODULE_PARM_DESC(enable_display, "Enable display");
+
+module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600);
+MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)");
+
+module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600);
+MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)");
+
+module_param_named_unsafe(guc_firmware_path, xe_modparam.guc_firmware_path, charp, 0400);
+MODULE_PARM_DESC(guc_firmware_path,
+		 "GuC firmware path to use instead of the default one");
+
+module_param_named_unsafe(huc_firmware_path, xe_modparam.huc_firmware_path, charp, 0400);
+MODULE_PARM_DESC(huc_firmware_path,
+		 "HuC firmware path to use instead of the default one - empty string disables");
+
+module_param_named_unsafe(gsc_firmware_path, xe_modparam.gsc_firmware_path, charp, 0400);
+MODULE_PARM_DESC(gsc_firmware_path,
+		 "GSC firmware path to use instead of the default one - empty string disables");
+
+module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400);
+MODULE_PARM_DESC(force_probe,
+		 "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details.");
+
+struct init_funcs {
+	int (*init)(void);
+	void (*exit)(void);
+};
+
+static const struct init_funcs init_funcs[] = {
+	{
+		.init = xe_hw_fence_module_init,
+		.exit = xe_hw_fence_module_exit,
+	},
+	{
+		.init = xe_sched_job_module_init,
+		.exit = xe_sched_job_module_exit,
+	},
+	{
+		.init = xe_register_pci_driver,
+		.exit = xe_unregister_pci_driver,
+	},
+};
+
+static int __init xe_init(void)
+{
+	int err, i;
+
+	for (i = 0; i < ARRAY_SIZE(init_funcs); i++) {
+		err = init_funcs[i].init();
+		if (err) {
+			while (i--)
+				init_funcs[i].exit();
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void __exit xe_exit(void)
+{
+	int i;
+
+	for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--)
+		init_funcs[i].exit();
+}
+
+module_init(xe_init);
+module_exit(xe_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
new file mode 100644
index 000000000000..88ef0e8b2bfd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MODULE_H_
+#define _XE_MODULE_H_
+
+#include <linux/types.h>
+
+/* Module modprobe variables */
+struct xe_modparam {
+	bool force_execlist;
+	bool enable_display;
+	u32 force_vram_bar_size;
+	int guc_log_level;
+	char *guc_firmware_path;
+	char *huc_firmware_path;
+	char *gsc_firmware_path;
+	char *force_probe;
+};
+
+extern struct xe_modparam xe_modparam;
+
+#endif
+
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
new file mode 100644
index 000000000000..1ff6bc79e7d4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_pat.h"
+
+#include <drm/xe_drm.h>
+
+#include "regs/xe_reg_defs.h"
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_mmio.h"
+
+#define _PAT_ATS				0x47fc
+#define _PAT_INDEX(index)			_PICK_EVEN_2RANGES(index, 8, \
+								   0x4800, 0x4804, \
+								   0x4848, 0x484c)
+#define _PAT_PTA				0x4820
+
+#define XE2_NO_PROMOTE				REG_BIT(10)
+#define XE2_COMP_EN				REG_BIT(9)
+#define XE2_L3_CLOS				REG_GENMASK(7, 6)
+#define XE2_L3_POLICY				REG_GENMASK(5, 4)
+#define XE2_L4_POLICY				REG_GENMASK(3, 2)
+#define XE2_COH_MODE				REG_GENMASK(1, 0)
+
+#define XELPG_L4_POLICY_MASK			REG_GENMASK(3, 2)
+#define XELPG_PAT_3_UC				REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 3)
+#define XELPG_PAT_1_WT				REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 1)
+#define XELPG_PAT_0_WB				REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 0)
+#define XELPG_INDEX_COH_MODE_MASK		REG_GENMASK(1, 0)
+#define XELPG_3_COH_2W				REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 3)
+#define XELPG_2_COH_1W				REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 2)
+#define XELPG_0_COH_NON				REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 0)
+
+#define XEHPC_CLOS_LEVEL_MASK			REG_GENMASK(3, 2)
+#define XEHPC_PAT_CLOS(x)			REG_FIELD_PREP(XEHPC_CLOS_LEVEL_MASK, x)
+
+#define XELP_MEM_TYPE_MASK			REG_GENMASK(1, 0)
+#define XELP_PAT_WB				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 3)
+#define XELP_PAT_WT				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 2)
+#define XELP_PAT_WC				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 1)
+#define XELP_PAT_UC				REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 0)
+
+static const char *XELP_MEM_TYPE_STR_MAP[] = { "UC", "WC", "WT", "WB" };
+
+struct xe_pat_ops {
+	void (*program_graphics)(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+				 int n_entries);
+	void (*program_media)(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			      int n_entries);
+	void (*dump)(struct xe_gt *gt, struct drm_printer *p);
+};
+
+static const struct xe_pat_table_entry xelp_pat_table[] = {
+	[0] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
+	[1] = { XELP_PAT_WC, XE_COH_NONE },
+	[2] = { XELP_PAT_WT, XE_COH_NONE },
+	[3] = { XELP_PAT_UC, XE_COH_NONE },
+};
+
+static const struct xe_pat_table_entry xehpc_pat_table[] = {
+	[0] = { XELP_PAT_UC, XE_COH_NONE },
+	[1] = { XELP_PAT_WC, XE_COH_NONE },
+	[2] = { XELP_PAT_WT, XE_COH_NONE },
+	[3] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
+	[4] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WT, XE_COH_NONE },
+	[5] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
+	[6] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WT, XE_COH_NONE },
+	[7] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
+};
+
+static const struct xe_pat_table_entry xelpg_pat_table[] = {
+	[0] = { XELPG_PAT_0_WB, XE_COH_NONE },
+	[1] = { XELPG_PAT_1_WT, XE_COH_NONE },
+	[2] = { XELPG_PAT_3_UC, XE_COH_NONE },
+	[3] = { XELPG_PAT_0_WB | XELPG_2_COH_1W, XE_COH_AT_LEAST_1WAY },
+	[4] = { XELPG_PAT_0_WB | XELPG_3_COH_2W, XE_COH_AT_LEAST_1WAY },
+};
+
+/*
+ * The Xe2 table is getting large/complicated so it's easier to review if
+ * provided in a form that exactly matches the bspec's formatting.  The meaning
+ * of the fields here are:
+ *   - no_promote:  0=promotable, 1=no promote
+ *   - comp_en:     0=disable, 1=enable
+ *   - l3clos:      L3 class of service (0-3)
+ *   - l3_policy:   0=WB, 1=XD ("WB - Transient Display"), 3=UC
+ *   - l4_policy:   0=WB, 1=WT, 3=UC
+ *   - coh_mode:    0=no snoop, 2=1-way coherent, 3=2-way coherent
+ *
+ * Reserved entries should be programmed with the maximum caching, minimum
+ * coherency (which matches an all-0's encoding), so we can just omit them
+ * in the table.
+ */
+#define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \
+	{ \
+		.value = (no_promote ? XE2_NO_PROMOTE : 0) | \
+			(comp_en ? XE2_COMP_EN : 0) | \
+			REG_FIELD_PREP(XE2_L3_CLOS, l3clos) | \
+			REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \
+			REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \
+			REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \
+		.coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE \
+	}
+
+static const struct xe_pat_table_entry xe2_pat_table[] = {
+	[ 0] = XE2_PAT( 0, 0, 0, 0, 3, 0 ),
+	[ 1] = XE2_PAT( 0, 0, 0, 0, 3, 2 ),
+	[ 2] = XE2_PAT( 0, 0, 0, 0, 3, 3 ),
+	[ 3] = XE2_PAT( 0, 0, 0, 3, 3, 0 ),
+	[ 4] = XE2_PAT( 0, 0, 0, 3, 0, 2 ),
+	[ 5] = XE2_PAT( 0, 0, 0, 3, 3, 2 ),
+	[ 6] = XE2_PAT( 1, 0, 0, 1, 3, 0 ),
+	[ 7] = XE2_PAT( 0, 0, 0, 3, 0, 3 ),
+	[ 8] = XE2_PAT( 0, 0, 0, 3, 0, 0 ),
+	[ 9] = XE2_PAT( 0, 1, 0, 0, 3, 0 ),
+	[10] = XE2_PAT( 0, 1, 0, 3, 0, 0 ),
+	[11] = XE2_PAT( 1, 1, 0, 1, 3, 0 ),
+	[12] = XE2_PAT( 0, 1, 0, 3, 3, 0 ),
+	[13] = XE2_PAT( 0, 0, 0, 0, 0, 0 ),
+	[14] = XE2_PAT( 0, 1, 0, 0, 0, 0 ),
+	[15] = XE2_PAT( 1, 1, 0, 1, 1, 0 ),
+	/* 16..19 are reserved; leave set to all 0's */
+	[20] = XE2_PAT( 0, 0, 1, 0, 3, 0 ),
+	[21] = XE2_PAT( 0, 1, 1, 0, 3, 0 ),
+	[22] = XE2_PAT( 0, 0, 1, 0, 3, 2 ),
+	[23] = XE2_PAT( 0, 0, 1, 0, 3, 3 ),
+	[24] = XE2_PAT( 0, 0, 2, 0, 3, 0 ),
+	[25] = XE2_PAT( 0, 1, 2, 0, 3, 0 ),
+	[26] = XE2_PAT( 0, 0, 2, 0, 3, 2 ),
+	[27] = XE2_PAT( 0, 0, 2, 0, 3, 3 ),
+	[28] = XE2_PAT( 0, 0, 3, 0, 3, 0 ),
+	[29] = XE2_PAT( 0, 1, 3, 0, 3, 0 ),
+	[30] = XE2_PAT( 0, 0, 3, 0, 3, 2 ),
+	[31] = XE2_PAT( 0, 0, 3, 0, 3, 3 ),
+};
+
+/* Special PAT values programmed outside the main table */
+static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 0, 0, 3, 3 );
+
+u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index)
+{
+	WARN_ON(pat_index >= xe->pat.n_entries);
+	return xe->pat.table[pat_index].coh_mode;
+}
+
+static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			int n_entries)
+{
+	for (int i = 0; i < n_entries; i++) {
+		struct xe_reg reg = XE_REG(_PAT_INDEX(i));
+
+		xe_mmio_write32(gt, reg, table[i].value);
+	}
+}
+
+static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			    int n_entries)
+{
+	for (int i = 0; i < n_entries; i++) {
+		struct xe_reg_mcr reg_mcr = XE_REG_MCR(_PAT_INDEX(i));
+
+		xe_gt_mcr_multicast_write(gt, reg_mcr, table[i].value);
+	}
+}
+
+static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		u32 pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+		u8 mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat);
+
+		drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i,
+			   XELP_MEM_TYPE_STR_MAP[mem_type], pat);
+	}
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
+static const struct xe_pat_ops xelp_pat_ops = {
+	.program_graphics = program_pat,
+	.dump = xelp_dump,
+};
+
+static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
+		u8 mem_type;
+
+		mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat);
+
+		drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i,
+			   XELP_MEM_TYPE_STR_MAP[mem_type], pat);
+	}
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
+static const struct xe_pat_ops xehp_pat_ops = {
+	.program_graphics = program_pat_mcr,
+	.dump = xehp_dump,
+};
+
+static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
+
+		drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i,
+			   REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat),
+			   REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat);
+	}
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
+static const struct xe_pat_ops xehpc_pat_ops = {
+	.program_graphics = program_pat_mcr,
+	.dump = xehpc_dump,
+};
+
+static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		u32 pat;
+
+		if (xe_gt_is_media_type(gt))
+			pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+		else
+			pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
+
+		drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i,
+			   REG_FIELD_GET(XELPG_L4_POLICY_MASK, pat),
+			   REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat);
+	}
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
+/*
+ * SAMedia register offsets are adjusted by the write methods and they target
+ * registers that are not MCR, while for normal GT they are MCR
+ */
+static const struct xe_pat_ops xelpg_pat_ops = {
+	.program_graphics = program_pat,
+	.program_media = program_pat_mcr,
+	.dump = xelpg_dump,
+};
+
+static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			       int n_entries)
+{
+	program_pat_mcr(gt, table, n_entries);
+	xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value);
+}
+
+static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
+			       int n_entries)
+{
+	program_pat(gt, table, n_entries);
+	xe_mmio_write32(gt, XE_REG(_PAT_ATS), xe2_pat_ats.value);
+}
+
+static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int i, err;
+	u32 pat;
+
+	xe_device_mem_access_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto err_fw;
+
+	drm_printf(p, "PAT table:\n");
+
+	for (i = 0; i < xe->pat.n_entries; i++) {
+		if (xe_gt_is_media_type(gt))
+			pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i)));
+		else
+			pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
+
+		drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ]  (%#8x)\n", i,
+			   !!(pat & XE2_NO_PROMOTE),
+			   !!(pat & XE2_COMP_EN),
+			   REG_FIELD_GET(XE2_L3_CLOS, pat),
+			   REG_FIELD_GET(XE2_L3_POLICY, pat),
+			   REG_FIELD_GET(XE2_L4_POLICY, pat),
+			   REG_FIELD_GET(XE2_COH_MODE, pat),
+			   pat);
+	}
+
+	/*
+	 * Also print PTA_MODE, which describes how the hardware accesses
+	 * PPGTT entries.
+	 */
+	if (xe_gt_is_media_type(gt))
+		pat = xe_mmio_read32(gt, XE_REG(_PAT_PTA));
+	else
+		pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA));
+
+	drm_printf(p, "Page Table Access:\n");
+	drm_printf(p, "PTA_MODE= [ %u, %u, %u, %u, %u, %u ]  (%#8x)\n",
+		   !!(pat & XE2_NO_PROMOTE),
+		   !!(pat & XE2_COMP_EN),
+		   REG_FIELD_GET(XE2_L3_CLOS, pat),
+		   REG_FIELD_GET(XE2_L3_POLICY, pat),
+		   REG_FIELD_GET(XE2_L4_POLICY, pat),
+		   REG_FIELD_GET(XE2_COH_MODE, pat),
+		   pat);
+
+	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_fw:
+	xe_assert(xe, !err);
+	xe_device_mem_access_put(xe);
+}
+
+static const struct xe_pat_ops xe2_pat_ops = {
+	.program_graphics = xe2lpg_program_pat,
+	.program_media = xe2lpm_program_pat,
+	.dump = xe2_dump,
+};
+
+void xe_pat_init_early(struct xe_device *xe)
+{
+	if (GRAPHICS_VER(xe) == 20) {
+		xe->pat.ops = &xe2_pat_ops;
+		xe->pat.table = xe2_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 3;
+		xe->pat.idx[XE_CACHE_WT] = 15;
+		xe->pat.idx[XE_CACHE_WB] = 2;
+		xe->pat.idx[XE_CACHE_NONE_COMPRESSION] = 12; /*Applicable on xe2 and beyond */
+	} else if (xe->info.platform == XE_METEORLAKE) {
+		xe->pat.ops = &xelpg_pat_ops;
+		xe->pat.table = xelpg_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xelpg_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 2;
+		xe->pat.idx[XE_CACHE_WT] = 1;
+		xe->pat.idx[XE_CACHE_WB] = 3;
+	} else if (xe->info.platform == XE_PVC) {
+		xe->pat.ops = &xehpc_pat_ops;
+		xe->pat.table = xehpc_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xehpc_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 0;
+		xe->pat.idx[XE_CACHE_WT] = 2;
+		xe->pat.idx[XE_CACHE_WB] = 3;
+	} else if (xe->info.platform == XE_DG2) {
+		/*
+		 * Table is the same as previous platforms, but programming
+		 * method has changed.
+		 */
+		xe->pat.ops = &xehp_pat_ops;
+		xe->pat.table = xelp_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 3;
+		xe->pat.idx[XE_CACHE_WT] = 2;
+		xe->pat.idx[XE_CACHE_WB] = 0;
+	} else if (GRAPHICS_VERx100(xe) <= 1210) {
+		WARN_ON_ONCE(!IS_DGFX(xe) && !xe->info.has_llc);
+		xe->pat.ops = &xelp_pat_ops;
+		xe->pat.table = xelp_pat_table;
+		xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table);
+		xe->pat.idx[XE_CACHE_NONE] = 3;
+		xe->pat.idx[XE_CACHE_WT] = 2;
+		xe->pat.idx[XE_CACHE_WB] = 0;
+	} else {
+		/*
+		 * Going forward we expect to need new PAT settings for most
+		 * new platforms; failure to provide a new table can easily
+		 * lead to subtle, hard-to-debug problems.  If none of the
+		 * conditions above match the platform we're running on we'll
+		 * raise an error rather than trying to silently inherit the
+		 * most recent platform's behavior.
+		 */
+		drm_err(&xe->drm, "Missing PAT table for platform with graphics version %d.%02d!\n",
+			GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
+	}
+}
+
+void xe_pat_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (!xe->pat.ops)
+		return;
+
+	if (xe_gt_is_media_type(gt))
+		xe->pat.ops->program_media(gt, xe->pat.table, xe->pat.n_entries);
+	else
+		xe->pat.ops->program_graphics(gt, xe->pat.table, xe->pat.n_entries);
+}
+
+void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (!xe->pat.ops->dump)
+		return;
+
+	xe->pat.ops->dump(gt, p);
+}
diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h
new file mode 100644
index 000000000000..fa0dfbe525cd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pat.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_PAT_H_
+#define _XE_PAT_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_device;
+struct xe_gt;
+
+/**
+ * struct xe_pat_table_entry - The pat_index encoding and other meta information.
+ */
+struct xe_pat_table_entry {
+	/**
+	 * @value: The platform specific value encoding the various memory
+	 * attributes (this maps to some fixed pat_index). So things like
+	 * caching, coherency, compression etc can be encoded here.
+	 */
+	u32 value;
+
+	/**
+	 * @coh_mode: The GPU coherency mode that @value maps to.
+	 */
+#define XE_COH_NONE          1
+#define XE_COH_AT_LEAST_1WAY 2
+	u16 coh_mode;
+};
+
+/**
+ * xe_pat_init_early - SW initialization, setting up data based on device
+ * @xe: xe device
+ */
+void xe_pat_init_early(struct xe_device *xe);
+
+/**
+ * xe_pat_init - Program HW PAT table
+ * @gt: GT structure
+ */
+void xe_pat_init(struct xe_gt *gt);
+
+/**
+ * xe_pat_dump - Dump PAT table
+ * @gt: GT structure
+ * @p: Printer to dump info to
+ */
+void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p);
+
+/**
+ * xe_pat_index_get_coh_mode - Extract the coherency mode for the given
+ * pat_index.
+ * @xe: xe device
+ * @pat_index: The pat_index to query
+ */
+u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
new file mode 100644
index 000000000000..dcc5ded1558e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -0,0 +1,951 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_pci.h"
+
+#include <kunit/static_stub.h>
+#include <linux/device/driver.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_color_mgmt.h>
+#include <drm/drm_drv.h>
+#include <drm/xe_pciids.h>
+
+#include "regs/xe_gt_regs.h"
+#include "xe_device.h"
+#include "xe_display.h"
+#include "xe_drv.h"
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_module.h"
+#include "xe_pci_types.h"
+#include "xe_pm.h"
+#include "xe_sriov.h"
+#include "xe_step.h"
+#include "xe_tile.h"
+
+enum toggle_d3cold {
+	D3COLD_DISABLE,
+	D3COLD_ENABLE,
+};
+
+struct xe_subplatform_desc {
+	enum xe_subplatform subplatform;
+	const char *name;
+	const u16 *pciidlist;
+};
+
+struct xe_gt_desc {
+	enum xe_gt_type type;
+	u32 mmio_adj_limit;
+	u32 mmio_adj_offset;
+};
+
+struct xe_device_desc {
+	/* Should only ever be set for platforms without GMD_ID */
+	const struct xe_graphics_desc *graphics;
+	/* Should only ever be set for platforms without GMD_ID */
+	const struct xe_media_desc *media;
+
+	const char *platform_name;
+	const struct xe_subplatform_desc *subplatforms;
+
+	enum xe_platform platform;
+
+	u8 require_force_probe:1;
+	u8 is_dgfx:1;
+
+	u8 has_display:1;
+	u8 has_heci_gscfi:1;
+	u8 has_llc:1;
+	u8 has_mmio_ext:1;
+	u8 has_sriov:1;
+	u8 skip_guc_pc:1;
+	u8 skip_mtcfg:1;
+	u8 skip_pcode:1;
+};
+
+__diag_push();
+__diag_ignore_all("-Woverride-init", "Allow field overrides in table");
+
+#define PLATFORM(x)		\
+	.platform = (x),	\
+	.platform_name = #x
+
+#define NOP(x)	x
+
+static const struct xe_graphics_desc graphics_xelp = {
+	.name = "Xe_LP",
+	.ver = 12,
+	.rel = 0,
+
+	.hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0),
+
+	.dma_mask_size = 39,
+	.va_bits = 48,
+	.vm_max_level = 3,
+};
+
+static const struct xe_graphics_desc graphics_xelpp = {
+	.name = "Xe_LP+",
+	.ver = 12,
+	.rel = 10,
+
+	.hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0),
+
+	.dma_mask_size = 39,
+	.va_bits = 48,
+	.vm_max_level = 3,
+};
+
+#define XE_HP_FEATURES \
+	.has_range_tlb_invalidation = true, \
+	.has_flat_ccs = true, \
+	.dma_mask_size = 46, \
+	.va_bits = 48, \
+	.vm_max_level = 3
+
+static const struct xe_graphics_desc graphics_xehpg = {
+	.name = "Xe_HPG",
+	.ver = 12,
+	.rel = 55,
+
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) |
+		BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3),
+
+	XE_HP_FEATURES,
+	.vram_flags = XE_VRAM_FLAGS_NEED64K,
+};
+
+static const struct xe_graphics_desc graphics_xehpc = {
+	.name = "Xe_HPC",
+	.ver = 12,
+	.rel = 60,
+
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_BCS1) |
+		BIT(XE_HW_ENGINE_BCS2) | BIT(XE_HW_ENGINE_BCS3) |
+		BIT(XE_HW_ENGINE_BCS4) | BIT(XE_HW_ENGINE_BCS5) |
+		BIT(XE_HW_ENGINE_BCS6) | BIT(XE_HW_ENGINE_BCS7) |
+		BIT(XE_HW_ENGINE_BCS8) |
+		BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) |
+		BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3),
+
+	XE_HP_FEATURES,
+	.dma_mask_size = 52,
+	.max_remote_tiles = 1,
+	.va_bits = 57,
+	.vm_max_level = 4,
+	.vram_flags = XE_VRAM_FLAGS_NEED64K,
+
+	.has_asid = 1,
+	.has_flat_ccs = 0,
+	.has_usm = 1,
+};
+
+static const struct xe_graphics_desc graphics_xelpg = {
+	.name = "Xe_LPG",
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+		BIT(XE_HW_ENGINE_CCS0),
+
+	XE_HP_FEATURES,
+	.has_flat_ccs = 0,
+};
+
+#define XE2_GFX_FEATURES \
+	.dma_mask_size = 46, \
+	.has_asid = 1, \
+	.has_flat_ccs = 1, \
+	.has_range_tlb_invalidation = 1, \
+	.has_usm = 0 /* FIXME: implementation missing */, \
+	.va_bits = 48, \
+	.vm_max_level = 4, \
+	.hw_engine_mask = \
+		BIT(XE_HW_ENGINE_RCS0) | \
+		BIT(XE_HW_ENGINE_BCS8) | BIT(XE_HW_ENGINE_BCS0) | \
+		GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)
+
+static const struct xe_graphics_desc graphics_xe2 = {
+	.name = "Xe2_LPG",
+
+	XE2_GFX_FEATURES,
+};
+
+static const struct xe_media_desc media_xem = {
+	.name = "Xe_M",
+	.ver = 12,
+	.rel = 0,
+
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
+		BIT(XE_HW_ENGINE_VECS0),
+};
+
+static const struct xe_media_desc media_xehpm = {
+	.name = "Xe_HPM",
+	.ver = 12,
+	.rel = 55,
+
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1),
+};
+
+static const struct xe_media_desc media_xelpmp = {
+	.name = "Xe_LPM+",
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
+		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_GSCCS0)
+};
+
+static const struct xe_media_desc media_xe2 = {
+	.name = "Xe2_LPM",
+	.hw_engine_mask =
+		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0), /* TODO: GSC0 */
+};
+
+static const struct xe_device_desc tgl_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
+	PLATFORM(XE_TIGERLAKE),
+	.has_display = true,
+	.has_llc = true,
+	.require_force_probe = true,
+};
+
+static const struct xe_device_desc rkl_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
+	PLATFORM(XE_ROCKETLAKE),
+	.has_display = true,
+	.has_llc = true,
+	.require_force_probe = true,
+};
+
+static const u16 adls_rpls_ids[] = { XE_RPLS_IDS(NOP), 0 };
+
+static const struct xe_device_desc adl_s_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
+	PLATFORM(XE_ALDERLAKE_S),
+	.has_display = true,
+	.has_llc = true,
+	.require_force_probe = true,
+	.subplatforms = (const struct xe_subplatform_desc[]) {
+		{ XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids },
+		{},
+	},
+};
+
+static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 };
+
+static const struct xe_device_desc adl_p_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
+	PLATFORM(XE_ALDERLAKE_P),
+	.has_display = true,
+	.has_llc = true,
+	.require_force_probe = true,
+	.subplatforms = (const struct xe_subplatform_desc[]) {
+		{ XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids },
+		{},
+	},
+};
+
+static const struct xe_device_desc adl_n_desc = {
+	.graphics = &graphics_xelp,
+	.media = &media_xem,
+	PLATFORM(XE_ALDERLAKE_N),
+	.has_display = true,
+	.has_llc = true,
+	.require_force_probe = true,
+};
+
+#define DGFX_FEATURES \
+	.is_dgfx = 1
+
+static const struct xe_device_desc dg1_desc = {
+	.graphics = &graphics_xelpp,
+	.media = &media_xem,
+	DGFX_FEATURES,
+	PLATFORM(XE_DG1),
+	.has_display = true,
+	.has_heci_gscfi = 1,
+	.require_force_probe = true,
+};
+
+static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 };
+static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 };
+static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
+
+#define DG2_FEATURES \
+	DGFX_FEATURES, \
+	PLATFORM(XE_DG2), \
+	.has_heci_gscfi = 1, \
+	.subplatforms = (const struct xe_subplatform_desc[]) { \
+		{ XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \
+		{ XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \
+		{ XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \
+		{ } \
+	}
+
+static const struct xe_device_desc ats_m_desc = {
+	.graphics = &graphics_xehpg,
+	.media = &media_xehpm,
+	.require_force_probe = true,
+
+	DG2_FEATURES,
+	.has_display = false,
+};
+
+static const struct xe_device_desc dg2_desc = {
+	.graphics = &graphics_xehpg,
+	.media = &media_xehpm,
+	.require_force_probe = true,
+
+	DG2_FEATURES,
+	.has_display = true,
+};
+
+static const __maybe_unused struct xe_device_desc pvc_desc = {
+	.graphics = &graphics_xehpc,
+	DGFX_FEATURES,
+	PLATFORM(XE_PVC),
+	.has_display = false,
+	.has_heci_gscfi = 1,
+	.require_force_probe = true,
+};
+
+static const struct xe_device_desc mtl_desc = {
+	/* .graphics and .media determined via GMD_ID */
+	.require_force_probe = true,
+	PLATFORM(XE_METEORLAKE),
+	.has_display = true,
+};
+
+static const struct xe_device_desc lnl_desc = {
+	PLATFORM(XE_LUNARLAKE),
+	.require_force_probe = true,
+};
+
+#undef PLATFORM
+__diag_pop();
+
+/* Map of GMD_ID values to graphics IP */
+static struct gmdid_map graphics_ip_map[] = {
+	{ 1270, &graphics_xelpg },
+	{ 1271, &graphics_xelpg },
+	{ 2004, &graphics_xe2 },
+};
+
+/* Map of GMD_ID values to media IP */
+static struct gmdid_map media_ip_map[] = {
+	{ 1300, &media_xelpmp },
+	{ 2000, &media_xe2 },
+};
+
+#define INTEL_VGA_DEVICE(id, info) {			\
+	PCI_DEVICE(PCI_VENDOR_ID_INTEL, id),		\
+	PCI_BASE_CLASS_DISPLAY << 16, 0xff << 16,	\
+	(unsigned long) info }
+
+/*
+ * Make sure any device matches here are from most specific to most
+ * general.  For example, since the Quanta match is based on the subsystem
+ * and subvendor IDs, we need it to come before the more general IVB
+ * PCI ID matches, otherwise we'll use the wrong info struct above.
+ */
+static const struct pci_device_id pciidlist[] = {
+	XE_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc),
+	XE_RKL_IDS(INTEL_VGA_DEVICE, &rkl_desc),
+	XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
+	XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+	XE_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc),
+	XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+	XE_RPLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
+	XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
+	XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
+	XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
+	XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
+	XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc),
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, pciidlist);
+
+#undef INTEL_VGA_DEVICE
+
+/* is device_id present in comma separated list of ids */
+static bool device_id_in_list(u16 device_id, const char *devices, bool negative)
+{
+	char *s, *p, *tok;
+	bool ret;
+
+	if (!devices || !*devices)
+		return false;
+
+	/* match everything */
+	if (negative && strcmp(devices, "!*") == 0)
+		return true;
+	if (!negative && strcmp(devices, "*") == 0)
+		return true;
+
+	s = kstrdup(devices, GFP_KERNEL);
+	if (!s)
+		return false;
+
+	for (p = s, ret = false; (tok = strsep(&p, ",")) != NULL; ) {
+		u16 val;
+
+		if (negative && tok[0] == '!')
+			tok++;
+		else if ((negative && tok[0] != '!') ||
+			 (!negative && tok[0] == '!'))
+			continue;
+
+		if (kstrtou16(tok, 16, &val) == 0 && val == device_id) {
+			ret = true;
+			break;
+		}
+	}
+
+	kfree(s);
+
+	return ret;
+}
+
+static bool id_forced(u16 device_id)
+{
+	return device_id_in_list(device_id, xe_modparam.force_probe, false);
+}
+
+static bool id_blocked(u16 device_id)
+{
+	return device_id_in_list(device_id, xe_modparam.force_probe, true);
+}
+
+static const struct xe_subplatform_desc *
+find_subplatform(const struct xe_device *xe, const struct xe_device_desc *desc)
+{
+	const struct xe_subplatform_desc *sp;
+	const u16 *id;
+
+	for (sp = desc->subplatforms; sp && sp->subplatform; sp++)
+		for (id = sp->pciidlist; *id; id++)
+			if (*id == xe->info.devid)
+				return sp;
+
+	return NULL;
+}
+
+enum xe_gmdid_type {
+	GMDID_GRAPHICS,
+	GMDID_MEDIA
+};
+
+static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid)
+{
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	struct xe_reg gmdid_reg = GMD_ID;
+	u32 val;
+
+	KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid);
+
+	if (type == GMDID_MEDIA)
+		gmdid_reg.addr += MEDIA_GT_GSI_OFFSET;
+
+	val = xe_mmio_read32(gt, gmdid_reg);
+	*ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val);
+	*revid = REG_FIELD_GET(GMD_ID_REVID, val);
+}
+
+/*
+ * Pre-GMD_ID platform: device descriptor already points to the appropriate
+ * graphics descriptor. Simply forward the description and calculate the version
+ * appropriately. "graphics" should be present in all such platforms, while
+ * media is optional.
+ */
+static void handle_pre_gmdid(struct xe_device *xe,
+			     const struct xe_graphics_desc *graphics,
+			     const struct xe_media_desc *media)
+{
+	xe->info.graphics_verx100 = graphics->ver * 100 + graphics->rel;
+
+	if (media)
+		xe->info.media_verx100 = media->ver * 100 + media->rel;
+
+}
+
+/*
+ * GMD_ID platform: read IP version from hardware and select graphics descriptor
+ * based on the result.
+ */
+static void handle_gmdid(struct xe_device *xe,
+			 const struct xe_graphics_desc **graphics,
+			 const struct xe_media_desc **media,
+			 u32 *graphics_revid,
+			 u32 *media_revid)
+{
+	u32 ver;
+
+	read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid);
+
+	for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) {
+		if (ver == graphics_ip_map[i].ver) {
+			xe->info.graphics_verx100 = ver;
+			*graphics = graphics_ip_map[i].ip;
+
+			break;
+		}
+	}
+
+	if (!xe->info.graphics_verx100) {
+		drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n",
+			ver / 100, ver % 100);
+	}
+
+	read_gmdid(xe, GMDID_MEDIA, &ver, media_revid);
+
+	/* Media may legitimately be fused off / not present */
+	if (ver == 0)
+		return;
+
+	for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) {
+		if (ver == media_ip_map[i].ver) {
+			xe->info.media_verx100 = ver;
+			*media = media_ip_map[i].ip;
+
+			break;
+		}
+	}
+
+	if (!xe->info.media_verx100) {
+		drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n",
+			ver / 100, ver % 100);
+	}
+}
+
+/*
+ * Initialize device info content that only depends on static driver_data
+ * passed to the driver at probe time from PCI ID table.
+ */
+static int xe_info_init_early(struct xe_device *xe,
+			      const struct xe_device_desc *desc,
+			      const struct xe_subplatform_desc *subplatform_desc)
+{
+	int err;
+
+	xe->info.platform = desc->platform;
+	xe->info.subplatform = subplatform_desc ?
+		subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
+
+	xe->info.is_dgfx = desc->is_dgfx;
+	xe->info.has_heci_gscfi = desc->has_heci_gscfi;
+	xe->info.has_llc = desc->has_llc;
+	xe->info.has_mmio_ext = desc->has_mmio_ext;
+	xe->info.has_sriov = desc->has_sriov;
+	xe->info.skip_guc_pc = desc->skip_guc_pc;
+	xe->info.skip_mtcfg = desc->skip_mtcfg;
+	xe->info.skip_pcode = desc->skip_pcode;
+
+	xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
+				  xe_modparam.enable_display &&
+				  desc->has_display;
+
+	err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/*
+ * Initialize device info content that does require knowledge about
+ * graphics / media IP version.
+ * Make sure that GT / tile structures allocated by the driver match the data
+ * present in device info.
+ */
+static int xe_info_init(struct xe_device *xe,
+			const struct xe_graphics_desc *graphics_desc,
+			const struct xe_media_desc *media_desc)
+{
+	u32 graphics_gmdid_revid = 0, media_gmdid_revid = 0;
+	struct xe_tile *tile;
+	struct xe_gt *gt;
+	u8 id;
+
+	/*
+	 * If this platform supports GMD_ID, we'll detect the proper IP
+	 * descriptor to use from hardware registers. desc->graphics will only
+	 * ever be set at this point for platforms before GMD_ID. In that case
+	 * the IP descriptions and versions are simply derived from that.
+	 */
+	if (graphics_desc) {
+		handle_pre_gmdid(xe, graphics_desc, media_desc);
+		xe->info.step = xe_step_pre_gmdid_get(xe);
+	} else {
+		xe_assert(xe, !media_desc);
+		handle_gmdid(xe, &graphics_desc, &media_desc,
+			     &graphics_gmdid_revid, &media_gmdid_revid);
+		xe->info.step = xe_step_gmdid_get(xe,
+						  graphics_gmdid_revid,
+						  media_gmdid_revid);
+	}
+
+	/*
+	 * If we couldn't detect the graphics IP, that's considered a fatal
+	 * error and we should abort driver load.  Failing to detect media
+	 * IP is non-fatal; we'll just proceed without enabling media support.
+	 */
+	if (!graphics_desc)
+		return -ENODEV;
+
+	xe->info.graphics_name = graphics_desc->name;
+	xe->info.media_name = media_desc ? media_desc->name : "none";
+	xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size;
+
+	xe->info.dma_mask_size = graphics_desc->dma_mask_size;
+	xe->info.vram_flags = graphics_desc->vram_flags;
+	xe->info.va_bits = graphics_desc->va_bits;
+	xe->info.vm_max_level = graphics_desc->vm_max_level;
+	xe->info.has_asid = graphics_desc->has_asid;
+	xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
+	xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
+	xe->info.has_usm = graphics_desc->has_usm;
+
+	/*
+	 * All platforms have at least one primary GT.  Any platform with media
+	 * version 13 or higher has an additional dedicated media GT.  And
+	 * depending on the graphics IP there may be additional "remote tiles."
+	 * All of these together determine the overall GT count.
+	 *
+	 * FIXME: 'tile_count' here is misnamed since the rest of the driver
+	 * treats it as the number of GTs rather than just the number of tiles.
+	 */
+	xe->info.tile_count = 1 + graphics_desc->max_remote_tiles;
+
+	for_each_remote_tile(tile, xe, id) {
+		int err;
+
+		err = xe_tile_init_early(tile, xe, id);
+		if (err)
+			return err;
+	}
+
+	for_each_tile(tile, xe, id) {
+		gt = tile->primary_gt;
+		gt->info.id = xe->info.gt_count++;
+		gt->info.type = XE_GT_TYPE_MAIN;
+		gt->info.__engine_mask = graphics_desc->hw_engine_mask;
+		if (MEDIA_VER(xe) < 13 && media_desc)
+			gt->info.__engine_mask |= media_desc->hw_engine_mask;
+
+		if (MEDIA_VER(xe) < 13 || !media_desc)
+			continue;
+
+		/*
+		 * Allocate and setup media GT for platforms with standalone
+		 * media.
+		 */
+		tile->media_gt = xe_gt_alloc(tile);
+		if (IS_ERR(tile->media_gt))
+			return PTR_ERR(tile->media_gt);
+
+		gt = tile->media_gt;
+		gt->info.type = XE_GT_TYPE_MEDIA;
+		gt->info.__engine_mask = media_desc->hw_engine_mask;
+		gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
+		gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
+
+		/*
+		 * FIXME: At the moment multi-tile and standalone media are
+		 * mutually exclusive on current platforms.  We'll need to
+		 * come up with a better way to number GTs if we ever wind
+		 * up with platforms that support both together.
+		 */
+		drm_WARN_ON(&xe->drm, id != 0);
+		gt->info.id = xe->info.gt_count++;
+	}
+
+	return 0;
+}
+
+static void xe_pci_remove(struct pci_dev *pdev)
+{
+	struct xe_device *xe;
+
+	xe = pci_get_drvdata(pdev);
+	if (!xe) /* driver load aborted, nothing to cleanup */
+		return;
+
+	xe_device_remove(xe);
+	xe_pm_runtime_fini(xe);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	const struct xe_device_desc *desc = (const void *)ent->driver_data;
+	const struct xe_subplatform_desc *subplatform_desc;
+	struct xe_device *xe;
+	int err;
+
+	if (desc->require_force_probe && !id_forced(pdev->device)) {
+		dev_info(&pdev->dev,
+			 "Your graphics device %04x is not officially supported\n"
+			 "by xe driver in this kernel version. To force Xe probe,\n"
+			 "use xe.force_probe='%04x' and i915.force_probe='!%04x'\n"
+			 "module parameters or CONFIG_DRM_XE_FORCE_PROBE='%04x' and\n"
+			 "CONFIG_DRM_I915_FORCE_PROBE='!%04x' configuration options.\n",
+			 pdev->device, pdev->device, pdev->device,
+			 pdev->device, pdev->device);
+		return -ENODEV;
+	}
+
+	if (id_blocked(pdev->device)) {
+		dev_info(&pdev->dev, "Probe blocked for device [%04x:%04x].\n",
+			 pdev->vendor, pdev->device);
+		return -ENODEV;
+	}
+
+	if (xe_display_driver_probe_defer(pdev))
+		return -EPROBE_DEFER;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	xe = xe_device_create(pdev, ent);
+	if (IS_ERR(xe))
+		return PTR_ERR(xe);
+
+	pci_set_drvdata(pdev, xe);
+
+	xe_pm_assert_unbounded_bridge(xe);
+	subplatform_desc = find_subplatform(xe, desc);
+
+	pci_set_master(pdev);
+
+	err = xe_info_init_early(xe, desc, subplatform_desc);
+	if (err)
+		return err;
+
+	xe_sriov_probe_early(xe, desc->has_sriov);
+
+	err = xe_device_probe_early(xe);
+	if (err)
+		return err;
+
+	err = xe_info_init(xe, desc->graphics, desc->media);
+	if (err)
+		return err;
+
+	xe_display_probe(xe);
+
+	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) display:%s dma_m_s:%d tc:%d gscfi:%d",
+		desc->platform_name,
+		subplatform_desc ? subplatform_desc->name : "",
+		xe->info.devid, xe->info.revid,
+		xe->info.is_dgfx,
+		xe->info.graphics_name,
+		xe->info.graphics_verx100 / 100,
+		xe->info.graphics_verx100 % 100,
+		xe->info.media_name,
+		xe->info.media_verx100 / 100,
+		xe->info.media_verx100 % 100,
+		str_yes_no(xe->info.enable_display),
+		xe->info.dma_mask_size, xe->info.tile_count,
+		xe->info.has_heci_gscfi);
+
+	drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n",
+		xe_step_name(xe->info.step.graphics),
+		xe_step_name(xe->info.step.media),
+		xe_step_name(xe->info.step.display),
+		xe_step_name(xe->info.step.basedie));
+
+	drm_dbg(&xe->drm, "SR-IOV support: %s (mode: %s)\n",
+		str_yes_no(xe_device_has_sriov(xe)),
+		xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
+
+	err = xe_device_probe(xe);
+	if (err)
+		return err;
+
+	xe_pm_init(xe);
+
+	drm_dbg(&xe->drm, "d3cold: capable=%s\n",
+		str_yes_no(xe->d3cold.capable));
+
+	return 0;
+}
+
+static void xe_pci_shutdown(struct pci_dev *pdev)
+{
+	xe_device_shutdown(pdev_to_xe_device(pdev));
+}
+
+#ifdef CONFIG_PM_SLEEP
+static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle)
+{
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	struct pci_dev *root_pdev;
+
+	if (!xe->d3cold.capable)
+		return;
+
+	root_pdev = pcie_find_root_port(pdev);
+	if (!root_pdev)
+		return;
+
+	switch (toggle) {
+	case D3COLD_DISABLE:
+		pci_d3cold_disable(root_pdev);
+		break;
+	case D3COLD_ENABLE:
+		pci_d3cold_enable(root_pdev);
+		break;
+	}
+}
+
+static int xe_pci_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int err;
+
+	err = xe_pm_suspend(pdev_to_xe_device(pdev));
+	if (err)
+		return err;
+
+	/*
+	 * Enabling D3Cold is needed for S2Idle/S0ix.
+	 * It is save to allow here since xe_pm_suspend has evicted
+	 * the local memory and the direct complete optimization is disabled.
+	 */
+	d3cold_toggle(pdev, D3COLD_ENABLE);
+
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+
+	return 0;
+}
+
+static int xe_pci_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int err;
+
+	/* Give back the D3Cold decision to the runtime P M*/
+	d3cold_toggle(pdev, D3COLD_DISABLE);
+
+	err = pci_set_power_state(pdev, PCI_D0);
+	if (err)
+		return err;
+
+	err = pci_enable_device(pdev);
+	if (err)
+		return err;
+
+	pci_set_master(pdev);
+
+	err = xe_pm_resume(pdev_to_xe_device(pdev));
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int xe_pci_runtime_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	int err;
+
+	err = xe_pm_runtime_suspend(xe);
+	if (err)
+		return err;
+
+	pci_save_state(pdev);
+
+	if (xe->d3cold.allowed) {
+		d3cold_toggle(pdev, D3COLD_ENABLE);
+		pci_disable_device(pdev);
+		pci_ignore_hotplug(pdev);
+		pci_set_power_state(pdev, PCI_D3cold);
+	} else {
+		d3cold_toggle(pdev, D3COLD_DISABLE);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+
+	return 0;
+}
+
+static int xe_pci_runtime_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	int err;
+
+	err = pci_set_power_state(pdev, PCI_D0);
+	if (err)
+		return err;
+
+	pci_restore_state(pdev);
+
+	if (xe->d3cold.allowed) {
+		err = pci_enable_device(pdev);
+		if (err)
+			return err;
+
+		pci_set_master(pdev);
+	}
+
+	return xe_pm_runtime_resume(xe);
+}
+
+static int xe_pci_runtime_idle(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+
+	xe_pm_d3cold_allowed_toggle(xe);
+
+	return 0;
+}
+
+static const struct dev_pm_ops xe_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(xe_pci_suspend, xe_pci_resume)
+	SET_RUNTIME_PM_OPS(xe_pci_runtime_suspend, xe_pci_runtime_resume, xe_pci_runtime_idle)
+};
+#endif
+
+static struct pci_driver xe_pci_driver = {
+	.name = DRIVER_NAME,
+	.id_table = pciidlist,
+	.probe = xe_pci_probe,
+	.remove = xe_pci_remove,
+	.shutdown = xe_pci_shutdown,
+#ifdef CONFIG_PM_SLEEP
+	.driver.pm = &xe_pm_ops,
+#endif
+};
+
+int xe_register_pci_driver(void)
+{
+	return pci_register_driver(&xe_pci_driver);
+}
+
+void xe_unregister_pci_driver(void)
+{
+	pci_unregister_driver(&xe_pci_driver);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_pci.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
new file mode 100644
index 000000000000..611c1209b14c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_PCI_H_
+#define _XE_PCI_H_
+
+int xe_register_pci_driver(void);
+void xe_unregister_pci_driver(void);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
new file mode 100644
index 000000000000..b1ad12fa22d6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_PCI_TYPES_H_
+#define _XE_PCI_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_graphics_desc {
+	const char *name;
+	u8 ver;
+	u8 rel;
+
+	u8 dma_mask_size;	/* available DMA address bits */
+	u8 va_bits;
+	u8 vm_max_level;
+	u8 vram_flags;
+
+	u64 hw_engine_mask;	/* hardware engines provided by graphics IP */
+
+	u32 tile_mmio_ext_size; /* size of MMIO extension space, per-tile */
+
+	u8 max_remote_tiles:2;
+
+	u8 has_asid:1;
+	u8 has_flat_ccs:1;
+	u8 has_range_tlb_invalidation:1;
+	u8 has_usm:1;
+};
+
+struct xe_media_desc {
+	const char *name;
+	u8 ver;
+	u8 rel;
+
+	u64 hw_engine_mask;	/* hardware engines provided by media IP */
+};
+
+struct gmdid_map {
+	unsigned int ver;
+	const void *ip;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
new file mode 100644
index 000000000000..b324dc2a5deb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_pcode.h"
+
+#include <linux/delay.h>
+#include <linux/errno.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "xe_pcode_api.h"
+
+/**
+ * DOC: PCODE
+ *
+ * Xe PCODE is the component responsible for interfacing with the PCODE
+ * firmware.
+ * It shall provide a very simple ABI to other Xe components, but be the
+ * single and consolidated place that will communicate with PCODE. All read
+ * and write operations to PCODE will be internal and private to this component.
+ *
+ * What's next:
+ * - PCODE hw metrics
+ * - PCODE for display operations
+ */
+
+static int pcode_mailbox_status(struct xe_gt *gt)
+{
+	u32 err;
+	static const struct pcode_err_decode err_decode[] = {
+		[PCODE_ILLEGAL_CMD] = {-ENXIO, "Illegal Command"},
+		[PCODE_TIMEOUT] = {-ETIMEDOUT, "Timed out"},
+		[PCODE_ILLEGAL_DATA] = {-EINVAL, "Illegal Data"},
+		[PCODE_ILLEGAL_SUBCOMMAND] = {-ENXIO, "Illegal Subcommand"},
+		[PCODE_LOCKED] = {-EBUSY, "PCODE Locked"},
+		[PCODE_GT_RATIO_OUT_OF_RANGE] = {-EOVERFLOW,
+			"GT ratio out of range"},
+		[PCODE_REJECTED] = {-EACCES, "PCODE Rejected"},
+		[PCODE_ERROR_MASK] = {-EPROTO, "Unknown"},
+	};
+
+	lockdep_assert_held(&gt->pcode.lock);
+
+	err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK;
+	if (err) {
+		drm_err(&gt_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err,
+			err_decode[err].str ?: "Unknown");
+		return err_decode[err].errno ?: -EPROTO;
+	}
+
+	return 0;
+}
+
+static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
+			    unsigned int timeout_ms, bool return_data,
+			    bool atomic)
+{
+	int err;
+
+	if (gt_to_xe(gt)->info.skip_pcode)
+		return 0;
+
+	lockdep_assert_held(&gt->pcode.lock);
+
+	if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0)
+		return -EAGAIN;
+
+	xe_mmio_write32(gt, PCODE_DATA0, *data0);
+	xe_mmio_write32(gt, PCODE_DATA1, data1 ? *data1 : 0);
+	xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox);
+
+	err = xe_mmio_wait32(gt, PCODE_MAILBOX, PCODE_READY, 0,
+			     timeout_ms * 1000, NULL, atomic);
+	if (err)
+		return err;
+
+	if (return_data) {
+		*data0 = xe_mmio_read32(gt, PCODE_DATA0);
+		if (data1)
+			*data1 = xe_mmio_read32(gt, PCODE_DATA1);
+	}
+
+	return pcode_mailbox_status(gt);
+}
+
+int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout)
+{
+	int err;
+
+	mutex_lock(&gt->pcode.lock);
+	err = pcode_mailbox_rw(gt, mbox, &data, NULL, timeout, false, false);
+	mutex_unlock(&gt->pcode.lock);
+
+	return err;
+}
+
+int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1)
+{
+	int err;
+
+	mutex_lock(&gt->pcode.lock);
+	err = pcode_mailbox_rw(gt, mbox, val, val1, 1, true, false);
+	mutex_unlock(&gt->pcode.lock);
+
+	return err;
+}
+
+static int xe_pcode_try_request(struct xe_gt *gt, u32 mbox,
+				u32 request, u32 reply_mask, u32 reply,
+				u32 *status, bool atomic, int timeout_us)
+{
+	int slept, wait = 10;
+
+	for (slept = 0; slept < timeout_us; slept += wait) {
+		*status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
+					   atomic);
+		if ((*status == 0) && ((request & reply_mask) == reply))
+			return 0;
+
+		if (atomic)
+			udelay(wait);
+		else
+			usleep_range(wait, wait << 1);
+		wait <<= 1;
+	}
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * xe_pcode_request - send PCODE request until acknowledgment
+ * @gt: gt
+ * @mbox: PCODE mailbox ID the request is targeted for
+ * @request: request ID
+ * @reply_mask: mask used to check for request acknowledgment
+ * @reply: value used to check for request acknowledgment
+ * @timeout_base_ms: timeout for polling with preemption enabled
+ *
+ * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
+ * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
+ * The request is acknowledged once the PCODE reply dword equals @reply after
+ * applying @reply_mask. Polling is first attempted with preemption enabled
+ * for @timeout_base_ms and if this times out for another 50 ms with
+ * preemption disabled.
+ *
+ * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
+ * other error as reported by PCODE.
+ */
+int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
+		      u32 reply_mask, u32 reply, int timeout_base_ms)
+{
+	u32 status;
+	int ret;
+
+	mutex_lock(&gt->pcode.lock);
+
+	ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+				   false, timeout_base_ms * 1000);
+	if (!ret)
+		goto out;
+
+	/*
+	 * The above can time out if the number of requests was low (2 in the
+	 * worst case) _and_ PCODE was busy for some reason even after a
+	 * (queued) request and @timeout_base_ms delay. As a workaround retry
+	 * the poll with preemption disabled to maximize the number of
+	 * requests. Increase the timeout from @timeout_base_ms to 50ms to
+	 * account for interrupts that could reduce the number of these
+	 * requests, and for any quirks of the PCODE firmware that delays
+	 * the request completion.
+	 */
+	drm_err(&gt_to_xe(gt)->drm,
+		"PCODE timeout, retrying with preemption disabled\n");
+	drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
+	preempt_disable();
+	ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+				   true, timeout_base_ms * 1000);
+	preempt_enable();
+
+out:
+	mutex_unlock(&gt->pcode.lock);
+	return status ? status : ret;
+}
+/**
+ * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table
+ * @gt: gt instance
+ * @min_gt_freq: Minimal (RPn) GT frequency in units of 50MHz.
+ * @max_gt_freq: Maximal (RP0) GT frequency in units of 50MHz.
+ *
+ * This function initialize PCODE's QOS frequency table for a proper minimal
+ * frequency/power steering decision, depending on the current requested GT
+ * frequency. For older platforms this was a more complete table including
+ * the IA freq. However for the latest platforms this table become a simple
+ * 1-1 Ring vs GT frequency. Even though, without setting it, PCODE might
+ * not take the right decisions for some memory frequencies and affect latency.
+ *
+ * It returns 0 on success, and -ERROR number on failure, -EINVAL if max
+ * frequency is higher then the minimal, and other errors directly translated
+ * from the PCODE Error returs:
+ * - -ENXIO: "Illegal Command"
+ * - -ETIMEDOUT: "Timed out"
+ * - -EINVAL: "Illegal Data"
+ * - -ENXIO, "Illegal Subcommand"
+ * - -EBUSY: "PCODE Locked"
+ * - -EOVERFLOW, "GT ratio out of range"
+ * - -EACCES, "PCODE Rejected"
+ * - -EPROTO, "Unknown"
+ */
+int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
+				 u32 max_gt_freq)
+{
+	int ret;
+	u32 freq;
+
+	if (!gt_to_xe(gt)->info.has_llc)
+		return 0;
+
+	if (max_gt_freq <= min_gt_freq)
+		return -EINVAL;
+
+	mutex_lock(&gt->pcode.lock);
+	for (freq = min_gt_freq; freq <= max_gt_freq; freq++) {
+		u32 data = freq << PCODE_FREQ_RING_RATIO_SHIFT | freq;
+
+		ret = pcode_mailbox_rw(gt, PCODE_WRITE_MIN_FREQ_TABLE,
+				       &data, NULL, 1, false, false);
+		if (ret)
+			goto unlock;
+	}
+
+unlock:
+	mutex_unlock(&gt->pcode.lock);
+	return ret;
+}
+
+/**
+ * xe_pcode_init - Ensure PCODE is initialized
+ * @gt: gt instance
+ *
+ * This function ensures that PCODE is properly initialized. To be called during
+ * probe and resume paths.
+ *
+ * It returns 0 on success, and -error number on failure.
+ */
+int xe_pcode_init(struct xe_gt *gt)
+{
+	u32 status, request = DGFX_GET_INIT_STATUS;
+	int timeout_us = 180000000; /* 3 min */
+	int ret;
+
+	if (gt_to_xe(gt)->info.skip_pcode)
+		return 0;
+
+	if (!IS_DGFX(gt_to_xe(gt)))
+		return 0;
+
+	mutex_lock(&gt->pcode.lock);
+	ret = xe_pcode_try_request(gt, DGFX_PCODE_STATUS, request,
+				   DGFX_INIT_STATUS_COMPLETE,
+				   DGFX_INIT_STATUS_COMPLETE,
+				   &status, false, timeout_us);
+	mutex_unlock(&gt->pcode.lock);
+
+	if (ret)
+		drm_err(&gt_to_xe(gt)->drm,
+			"PCODE initialization timedout after: 3 min\n");
+
+	return ret;
+}
+
+/**
+ * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized.
+ * @gt: gt instance
+ *
+ * This function initializes the xe_pcode component, and when needed, it ensures
+ * that PCODE has properly performed its initialization and it is really ready
+ * to go. To be called once only during probe.
+ *
+ * It returns 0 on success, and -error number on failure.
+ */
+int xe_pcode_probe(struct xe_gt *gt)
+{
+	drmm_mutex_init(&gt_to_xe(gt)->drm, &gt->pcode.lock);
+
+	if (gt_to_xe(gt)->info.skip_pcode)
+		return 0;
+
+	if (!IS_DGFX(gt_to_xe(gt)))
+		return 0;
+
+	return xe_pcode_init(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h
new file mode 100644
index 000000000000..08cb1d047cba
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PCODE_H_
+#define _XE_PCODE_H_
+
+#include <linux/types.h>
+struct xe_gt;
+
+int xe_pcode_probe(struct xe_gt *gt);
+int xe_pcode_init(struct xe_gt *gt);
+int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
+				 u32 max_gt_freq);
+int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1);
+int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val,
+			   int timeout_ms);
+#define xe_pcode_write(gt, mbox, val) \
+	xe_pcode_write_timeout(gt, mbox, val, 1)
+
+int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
+		     u32 reply_mask, u32 reply, int timeout_ms);
+
+#define PCODE_MBOX(mbcmd, param1, param2)\
+	(FIELD_PREP(PCODE_MB_COMMAND, mbcmd)\
+	| FIELD_PREP(PCODE_MB_PARAM1, param1)\
+	| FIELD_PREP(PCODE_MB_PARAM2, param2))
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
new file mode 100644
index 000000000000..5935cfe30204
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+/* Internal to xe_pcode */
+
+#include "regs/xe_reg_defs.h"
+
+#define PCODE_MAILBOX			XE_REG(0x138124)
+#define   PCODE_READY			REG_BIT(31)
+#define   PCODE_MB_PARAM2		REG_GENMASK(23, 16)
+#define   PCODE_MB_PARAM1		REG_GENMASK(15, 8)
+#define   PCODE_MB_COMMAND		REG_GENMASK(7, 0)
+#define   PCODE_ERROR_MASK		0xFF
+#define     PCODE_SUCCESS		0x0
+#define     PCODE_ILLEGAL_CMD		0x1
+#define     PCODE_TIMEOUT		0x2
+#define     PCODE_ILLEGAL_DATA		0x3
+#define     PCODE_ILLEGAL_SUBCOMMAND	0x4
+#define     PCODE_LOCKED		0x6
+#define     PCODE_GT_RATIO_OUT_OF_RANGE	0x10
+#define     PCODE_REJECTED		0x11
+
+#define PCODE_DATA0			XE_REG(0x138128)
+#define PCODE_DATA1			XE_REG(0x13812C)
+
+/* Min Freq QOS Table */
+#define   PCODE_WRITE_MIN_FREQ_TABLE	0x8
+#define   PCODE_READ_MIN_FREQ_TABLE	0x9
+#define   PCODE_FREQ_RING_RATIO_SHIFT	16
+
+/* PCODE Init */
+#define   DGFX_PCODE_STATUS		0x7E
+#define     DGFX_GET_INIT_STATUS	0x0
+#define     DGFX_INIT_STATUS_COMPLETE	0x1
+
+#define   PCODE_POWER_SETUP			0x7C
+#define     POWER_SETUP_SUBCOMMAND_READ_I1	0x4
+#define     POWER_SETUP_SUBCOMMAND_WRITE_I1	0x5
+#define	    POWER_SETUP_I1_WATTS		REG_BIT(31)
+#define	    POWER_SETUP_I1_SHIFT		6	/* 10.6 fixed point format */
+#define	    POWER_SETUP_I1_DATA_MASK		REG_GENMASK(15, 0)
+
+struct pcode_err_decode {
+	int errno;
+	const char *str;
+};
+
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
new file mode 100644
index 000000000000..553f53dbd093
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PLATFORM_INFO_TYPES_H_
+#define _XE_PLATFORM_INFO_TYPES_H_
+
+/*
+ * Keep this in graphics version based order and chronological order within a
+ * version
+ */
+enum xe_platform {
+	XE_PLATFORM_UNINITIALIZED = 0,
+	XE_TIGERLAKE,
+	XE_ROCKETLAKE,
+	XE_ALDERLAKE_S,
+	XE_ALDERLAKE_P,
+	XE_ALDERLAKE_N,
+	XE_DG1,
+	XE_DG2,
+	XE_PVC,
+	XE_METEORLAKE,
+	XE_LUNARLAKE,
+};
+
+enum xe_subplatform {
+	XE_SUBPLATFORM_UNINITIALIZED = 0,
+	XE_SUBPLATFORM_NONE,
+	XE_SUBPLATFORM_ALDERLAKE_P_RPLU,
+	XE_SUBPLATFORM_ALDERLAKE_S_RPLS,
+	XE_SUBPLATFORM_DG2_G10,
+	XE_SUBPLATFORM_DG2_G11,
+	XE_SUBPLATFORM_DG2_G12,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
new file mode 100644
index 000000000000..b429c2876a76
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_pm.h"
+
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_placement.h>
+
+#include "xe_bo.h"
+#include "xe_bo_evict.h"
+#include "xe_device.h"
+#include "xe_device_sysfs.h"
+#include "xe_display.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_irq.h"
+#include "xe_pcode.h"
+#include "xe_wa.h"
+
+/**
+ * DOC: Xe Power Management
+ *
+ * Xe PM shall be guided by the simplicity.
+ * Use the simplest hook options whenever possible.
+ * Let's not reinvent the runtime_pm references and hooks.
+ * Shall have a clear separation of display and gt underneath this component.
+ *
+ * What's next:
+ *
+ * For now s2idle and s3 are only working in integrated devices. The next step
+ * is to iterate through all VRAM's BO backing them up into the system memory
+ * before allowing the system suspend.
+ *
+ * Also runtime_pm needs to be here from the beginning.
+ *
+ * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC
+ * and no wait boost. Frequency optimizations should come on a next stage.
+ */
+
+/**
+ * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
+ * @xe: xe device instance
+ *
+ * Return: 0 on success
+ */
+int xe_pm_suspend(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	for_each_gt(gt, xe, id)
+		xe_gt_suspend_prepare(gt);
+
+	/* FIXME: Super racey... */
+	err = xe_bo_evict_all(xe);
+	if (err)
+		return err;
+
+	xe_display_pm_suspend(xe);
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_suspend(gt);
+		if (err) {
+			xe_display_pm_resume(xe);
+			return err;
+		}
+	}
+
+	xe_irq_suspend(xe);
+
+	xe_display_pm_suspend_late(xe);
+
+	return 0;
+}
+
+/**
+ * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
+ * @xe: xe device instance
+ *
+ * Return: 0 on success
+ */
+int xe_pm_resume(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	struct xe_gt *gt;
+	u8 id;
+	int err;
+
+	for_each_tile(tile, xe, id)
+		xe_wa_apply_tile_workarounds(tile);
+
+	for_each_gt(gt, xe, id) {
+		err = xe_pcode_init(gt);
+		if (err)
+			return err;
+	}
+
+	xe_display_pm_resume_early(xe);
+
+	/*
+	 * This only restores pinned memory which is the memory required for the
+	 * GT(s) to resume.
+	 */
+	err = xe_bo_restore_kernel(xe);
+	if (err)
+		return err;
+
+	xe_irq_resume(xe);
+
+	xe_display_pm_resume(xe);
+
+	for_each_gt(gt, xe, id)
+		xe_gt_resume(gt);
+
+	err = xe_bo_restore_user(xe);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev)
+{
+	struct pci_dev *root_pdev;
+
+	root_pdev = pcie_find_root_port(pdev);
+	if (!root_pdev)
+		return false;
+
+	/* D3Cold requires PME capability and _PR3 power resource */
+	if (!pci_pme_capable(root_pdev, PCI_D3cold) || !pci_pr3_present(root_pdev))
+		return false;
+
+	return true;
+}
+
+static void xe_pm_runtime_init(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+
+	/*
+	 * Disable the system suspend direct complete optimization.
+	 * We need to ensure that the regular device suspend/resume functions
+	 * are called since our runtime_pm cannot guarantee local memory
+	 * eviction for d3cold.
+	 * TODO: Check HDA audio dependencies claimed by i915, and then enforce
+	 *       this option to integrated graphics as well.
+	 */
+	if (IS_DGFX(xe))
+		dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, 1000);
+	pm_runtime_set_active(dev);
+	pm_runtime_allow(dev);
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put(dev);
+}
+
+void xe_pm_init(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+
+	/* For now suspend/resume is only allowed with GuC */
+	if (!xe_device_uc_enabled(xe))
+		return;
+
+	drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
+
+	xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev);
+
+	if (xe->d3cold.capable) {
+		xe_device_sysfs_init(xe);
+		xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
+	}
+
+	xe_pm_runtime_init(xe);
+}
+
+void xe_pm_runtime_fini(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+
+	pm_runtime_get_sync(dev);
+	pm_runtime_forbid(dev);
+}
+
+static void xe_pm_write_callback_task(struct xe_device *xe,
+				      struct task_struct *task)
+{
+	WRITE_ONCE(xe->pm_callback_task, task);
+
+	/*
+	 * Just in case it's somehow possible for our writes to be reordered to
+	 * the extent that something else re-uses the task written in
+	 * pm_callback_task. For example after returning from the callback, but
+	 * before the reordered write that resets pm_callback_task back to NULL.
+	 */
+	smp_mb(); /* pairs with xe_pm_read_callback_task */
+}
+
+struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
+{
+	smp_mb(); /* pairs with xe_pm_write_callback_task */
+
+	return READ_ONCE(xe->pm_callback_task);
+}
+
+int xe_pm_runtime_suspend(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err = 0;
+
+	if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe))
+		return -EBUSY;
+
+	/* Disable access_ongoing asserts and prevent recursive pm calls */
+	xe_pm_write_callback_task(xe, current);
+
+	/*
+	 * The actual xe_device_mem_access_put() is always async underneath, so
+	 * exactly where that is called should makes no difference to us. However
+	 * we still need to be very careful with the locks that this callback
+	 * acquires and the locks that are acquired and held by any callers of
+	 * xe_device_mem_access_get(). We already have the matching annotation
+	 * on that side, but we also need it here. For example lockdep should be
+	 * able to tell us if the following scenario is in theory possible:
+	 *
+	 * CPU0                          | CPU1 (kworker)
+	 * lock(A)                       |
+	 *                               | xe_pm_runtime_suspend()
+	 *                               |      lock(A)
+	 * xe_device_mem_access_get()    |
+	 *
+	 * This will clearly deadlock since rpm core needs to wait for
+	 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
+	 * on CPU0 which prevents CPU1 making forward progress.  With the
+	 * annotation here and in xe_device_mem_access_get() lockdep will see
+	 * the potential lock inversion and give us a nice splat.
+	 */
+	lock_map_acquire(&xe_device_mem_access_lockdep_map);
+
+	if (xe->d3cold.allowed) {
+		err = xe_bo_evict_all(xe);
+		if (err)
+			goto out;
+	}
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_suspend(gt);
+		if (err)
+			goto out;
+	}
+
+	xe_irq_suspend(xe);
+out:
+	lock_map_release(&xe_device_mem_access_lockdep_map);
+	xe_pm_write_callback_task(xe, NULL);
+	return err;
+}
+
+int xe_pm_runtime_resume(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	u8 id;
+	int err = 0;
+
+	/* Disable access_ongoing asserts and prevent recursive pm calls */
+	xe_pm_write_callback_task(xe, current);
+
+	lock_map_acquire(&xe_device_mem_access_lockdep_map);
+
+	/*
+	 * It can be possible that xe has allowed d3cold but other pcie devices
+	 * in gfx card soc would have blocked d3cold, therefore card has not
+	 * really lost power. Detecting primary Gt power is sufficient.
+	 */
+	gt = xe_device_get_gt(xe, 0);
+	xe->d3cold.power_lost = xe_guc_in_reset(&gt->uc.guc);
+
+	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
+		for_each_gt(gt, xe, id) {
+			err = xe_pcode_init(gt);
+			if (err)
+				goto out;
+		}
+
+		/*
+		 * This only restores pinned memory which is the memory
+		 * required for the GT(s) to resume.
+		 */
+		err = xe_bo_restore_kernel(xe);
+		if (err)
+			goto out;
+	}
+
+	xe_irq_resume(xe);
+
+	for_each_gt(gt, xe, id)
+		xe_gt_resume(gt);
+
+	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
+		err = xe_bo_restore_user(xe);
+		if (err)
+			goto out;
+	}
+out:
+	lock_map_release(&xe_device_mem_access_lockdep_map);
+	xe_pm_write_callback_task(xe, NULL);
+	return err;
+}
+
+int xe_pm_runtime_get(struct xe_device *xe)
+{
+	return pm_runtime_get_sync(xe->drm.dev);
+}
+
+int xe_pm_runtime_put(struct xe_device *xe)
+{
+	pm_runtime_mark_last_busy(xe->drm.dev);
+	return pm_runtime_put(xe->drm.dev);
+}
+
+int xe_pm_runtime_get_if_active(struct xe_device *xe)
+{
+	return pm_runtime_get_if_active(xe->drm.dev, true);
+}
+
+void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct pci_dev *bridge = pci_upstream_bridge(pdev);
+
+	if (!bridge)
+		return;
+
+	if (!bridge->driver) {
+		drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n");
+		device_set_pm_not_required(&pdev->dev);
+	}
+}
+
+int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
+{
+	struct ttm_resource_manager *man;
+	u32 vram_total_mb = 0;
+	int i;
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
+		man = ttm_manager_type(&xe->ttm, i);
+		if (man)
+			vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024);
+	}
+
+	drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb);
+
+	if (threshold > vram_total_mb)
+		return -EINVAL;
+
+	mutex_lock(&xe->d3cold.lock);
+	xe->d3cold.vram_threshold = threshold;
+	mutex_unlock(&xe->d3cold.lock);
+
+	return 0;
+}
+
+void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
+{
+	struct ttm_resource_manager *man;
+	u32 total_vram_used_mb = 0;
+	u64 vram_used;
+	int i;
+
+	if (!xe->d3cold.capable) {
+		xe->d3cold.allowed = false;
+		return;
+	}
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
+		man = ttm_manager_type(&xe->ttm, i);
+		if (man) {
+			vram_used = ttm_resource_manager_usage(man);
+			total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024);
+		}
+	}
+
+	mutex_lock(&xe->d3cold.lock);
+
+	if (total_vram_used_mb < xe->d3cold.vram_threshold)
+		xe->d3cold.allowed = true;
+	else
+		xe->d3cold.allowed = false;
+
+	mutex_unlock(&xe->d3cold.lock);
+
+	drm_dbg(&xe->drm,
+		"d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed));
+}
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
new file mode 100644
index 000000000000..6b9031f7af24
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PM_H_
+#define _XE_PM_H_
+
+#include <linux/pm_runtime.h>
+
+/*
+ * TODO: Threshold = 0 will block D3Cold.
+ *       Before we can move this to a higher value (like 300), we need to:
+ *           1. rewrite the VRAM save / restore to avoid buffer object locks
+ */
+#define DEFAULT_VRAM_THRESHOLD 0 /* in MB */
+
+struct xe_device;
+
+int xe_pm_suspend(struct xe_device *xe);
+int xe_pm_resume(struct xe_device *xe);
+
+void xe_pm_init(struct xe_device *xe);
+void xe_pm_runtime_fini(struct xe_device *xe);
+int xe_pm_runtime_suspend(struct xe_device *xe);
+int xe_pm_runtime_resume(struct xe_device *xe);
+int xe_pm_runtime_get(struct xe_device *xe);
+int xe_pm_runtime_put(struct xe_device *xe);
+int xe_pm_runtime_get_if_active(struct xe_device *xe);
+void xe_pm_assert_unbounded_bridge(struct xe_device *xe);
+int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold);
+void xe_pm_d3cold_allowed_toggle(struct xe_device *xe);
+struct task_struct *xe_pm_read_callback_task(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
new file mode 100644
index 000000000000..7bce2a332603
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_preempt_fence.h"
+
+#include <linux/slab.h>
+
+#include "xe_exec_queue.h"
+#include "xe_vm.h"
+
+static void preempt_fence_work_func(struct work_struct *w)
+{
+	bool cookie = dma_fence_begin_signalling();
+	struct xe_preempt_fence *pfence =
+		container_of(w, typeof(*pfence), preempt_work);
+	struct xe_exec_queue *q = pfence->q;
+
+	if (pfence->error)
+		dma_fence_set_error(&pfence->base, pfence->error);
+	else
+		q->ops->suspend_wait(q);
+
+	dma_fence_signal(&pfence->base);
+	dma_fence_end_signalling(cookie);
+
+	xe_vm_queue_rebind_worker(q->vm);
+
+	xe_exec_queue_put(q);
+}
+
+static const char *
+preempt_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "xe";
+}
+
+static const char *
+preempt_fence_get_timeline_name(struct dma_fence *fence)
+{
+	return "preempt";
+}
+
+static bool preempt_fence_enable_signaling(struct dma_fence *fence)
+{
+	struct xe_preempt_fence *pfence =
+		container_of(fence, typeof(*pfence), base);
+	struct xe_exec_queue *q = pfence->q;
+
+	pfence->error = q->ops->suspend(q);
+	queue_work(system_unbound_wq, &pfence->preempt_work);
+	return true;
+}
+
+static const struct dma_fence_ops preempt_fence_ops = {
+	.get_driver_name = preempt_fence_get_driver_name,
+	.get_timeline_name = preempt_fence_get_timeline_name,
+	.enable_signaling = preempt_fence_enable_signaling,
+};
+
+/**
+ * xe_preempt_fence_alloc() - Allocate a preempt fence with minimal
+ * initialization
+ *
+ * Allocate a preempt fence, and initialize its list head.
+ * If the preempt_fence allocated has been armed with
+ * xe_preempt_fence_arm(), it must be freed using dma_fence_put(). If not,
+ * it must be freed using xe_preempt_fence_free().
+ *
+ * Return: A struct xe_preempt_fence pointer used for calling into
+ * xe_preempt_fence_arm() or xe_preempt_fence_free().
+ * An error pointer on error.
+ */
+struct xe_preempt_fence *xe_preempt_fence_alloc(void)
+{
+	struct xe_preempt_fence *pfence;
+
+	pfence = kmalloc(sizeof(*pfence), GFP_KERNEL);
+	if (!pfence)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&pfence->link);
+	INIT_WORK(&pfence->preempt_work, preempt_fence_work_func);
+
+	return pfence;
+}
+
+/**
+ * xe_preempt_fence_free() - Free a preempt fence allocated using
+ * xe_preempt_fence_alloc().
+ * @pfence: pointer obtained from xe_preempt_fence_alloc();
+ *
+ * Free a preempt fence that has not yet been armed.
+ */
+void xe_preempt_fence_free(struct xe_preempt_fence *pfence)
+{
+	list_del(&pfence->link);
+	kfree(pfence);
+}
+
+/**
+ * xe_preempt_fence_arm() - Arm a preempt fence allocated using
+ * xe_preempt_fence_alloc().
+ * @pfence: The struct xe_preempt_fence pointer returned from
+ *          xe_preempt_fence_alloc().
+ * @q: The struct xe_exec_queue used for arming.
+ * @context: The dma-fence context used for arming.
+ * @seqno: The dma-fence seqno used for arming.
+ *
+ * Inserts the preempt fence into @context's timeline, takes @link off any
+ * list, and registers the struct xe_exec_queue as the xe_engine to be preempted.
+ *
+ * Return: A pointer to a struct dma_fence embedded into the preempt fence.
+ * This function doesn't error.
+ */
+struct dma_fence *
+xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q,
+		     u64 context, u32 seqno)
+{
+	list_del_init(&pfence->link);
+	pfence->q = xe_exec_queue_get(q);
+	dma_fence_init(&pfence->base, &preempt_fence_ops,
+		      &q->compute.lock, context, seqno);
+
+	return &pfence->base;
+}
+
+/**
+ * xe_preempt_fence_create() - Helper to create and arm a preempt fence.
+ * @q: The struct xe_exec_queue used for arming.
+ * @context: The dma-fence context used for arming.
+ * @seqno: The dma-fence seqno used for arming.
+ *
+ * Allocates and inserts the preempt fence into @context's timeline,
+ * and registers @e as the struct xe_exec_queue to be preempted.
+ *
+ * Return: A pointer to the resulting struct dma_fence on success. An error
+ * pointer on error. In particular if allocation fails it returns
+ * ERR_PTR(-ENOMEM);
+ */
+struct dma_fence *
+xe_preempt_fence_create(struct xe_exec_queue *q,
+			u64 context, u32 seqno)
+{
+	struct xe_preempt_fence *pfence;
+
+	pfence = xe_preempt_fence_alloc();
+	if (IS_ERR(pfence))
+		return ERR_CAST(pfence);
+
+	return xe_preempt_fence_arm(pfence, q, context, seqno);
+}
+
+bool xe_fence_is_xe_preempt(const struct dma_fence *fence)
+{
+	return fence->ops == &preempt_fence_ops;
+}
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.h b/drivers/gpu/drm/xe/xe_preempt_fence.h
new file mode 100644
index 000000000000..9406c6fea525
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PREEMPT_FENCE_H_
+#define _XE_PREEMPT_FENCE_H_
+
+#include "xe_preempt_fence_types.h"
+
+struct list_head;
+
+struct dma_fence *
+xe_preempt_fence_create(struct xe_exec_queue *q,
+			u64 context, u32 seqno);
+
+struct xe_preempt_fence *xe_preempt_fence_alloc(void);
+
+void xe_preempt_fence_free(struct xe_preempt_fence *pfence);
+
+struct dma_fence *
+xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q,
+		     u64 context, u32 seqno);
+
+static inline struct xe_preempt_fence *
+to_preempt_fence(struct dma_fence *fence)
+{
+	return container_of(fence, struct xe_preempt_fence, base);
+}
+
+/**
+ * xe_preempt_fence_link() - Return a link used to keep unarmed preempt
+ * fences on a list.
+ * @pfence: Pointer to the preempt fence.
+ *
+ * The link is embedded in the struct xe_preempt_fence. Use
+ * link_to_preempt_fence() to convert back to the preempt fence.
+ *
+ * Return: A pointer to an embedded struct list_head.
+ */
+static inline struct list_head *
+xe_preempt_fence_link(struct xe_preempt_fence *pfence)
+{
+	return &pfence->link;
+}
+
+/**
+ * to_preempt_fence_from_link() - Convert back to a preempt fence pointer
+ * from a link obtained with xe_preempt_fence_link().
+ * @link: The struct list_head obtained from xe_preempt_fence_link().
+ *
+ * Return: A pointer to the embedding struct xe_preempt_fence.
+ */
+static inline struct xe_preempt_fence *
+to_preempt_fence_from_link(struct list_head *link)
+{
+	return container_of(link, struct xe_preempt_fence, link);
+}
+
+bool xe_fence_is_xe_preempt(const struct dma_fence *fence);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
new file mode 100644
index 000000000000..b54b5c29b533
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PREEMPT_FENCE_TYPES_H_
+#define _XE_PREEMPT_FENCE_TYPES_H_
+
+#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
+
+struct xe_exec_queue;
+
+/**
+ * struct xe_preempt_fence - XE preempt fence
+ *
+ * hardware and triggers a callback once the xe_engine is complete.
+ */
+struct xe_preempt_fence {
+	/** @base: dma fence base */
+	struct dma_fence base;
+	/** @link: link into list of pending preempt fences */
+	struct list_head link;
+	/** @q: exec queue for this preempt fence */
+	struct xe_exec_queue *q;
+	/** @preempt_work: work struct which issues preemption */
+	struct work_struct preempt_work;
+	/** @error: preempt fence is in error state */
+	int error;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
new file mode 100644
index 000000000000..de1030a47588
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -0,0 +1,1653 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_pt.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_drm_client.h"
+#include "xe_gt.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_migrate.h"
+#include "xe_pt_types.h"
+#include "xe_pt_walk.h"
+#include "xe_res_cursor.h"
+#include "xe_trace.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_vm.h"
+
+struct xe_pt_dir {
+	struct xe_pt pt;
+	/** @dir: Directory structure for the xe_pt_walk functionality */
+	struct xe_ptw_dir dir;
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr))
+#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr)
+#else
+#define xe_pt_set_addr(__xe_pt, __addr)
+#define xe_pt_addr(__xe_pt) 0ull
+#endif
+
+static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48};
+static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48};
+
+#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1)
+
+static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt)
+{
+	return container_of(pt, struct xe_pt_dir, pt);
+}
+
+static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
+{
+	return container_of(pt_dir->dir.entries[index], struct xe_pt, base);
+}
+
+static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
+			     unsigned int level)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
+	u8 id = tile->id;
+
+	if (!xe_vm_has_scratch(vm))
+		return 0;
+
+	if (level > MAX_HUGEPTE_LEVEL)
+		return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo,
+						 0, pat_index);
+
+	return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) |
+		XE_PTE_NULL;
+}
+
+/**
+ * xe_pt_create() - Create a page-table.
+ * @vm: The vm to create for.
+ * @tile: The tile to create for.
+ * @level: The page-table level.
+ *
+ * Allocate and initialize a single struct xe_pt metadata structure. Also
+ * create the corresponding page-table bo, but don't initialize it. If the
+ * level is grater than zero, then it's assumed to be a directory page-
+ * table and the directory structure is also allocated and initialized to
+ * NULL pointers.
+ *
+ * Return: A valid struct xe_pt pointer on success, Pointer error code on
+ * error.
+ */
+struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
+			   unsigned int level)
+{
+	struct xe_pt *pt;
+	struct xe_bo *bo;
+	size_t size;
+	int err;
+
+	size = !level ?  sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) +
+		XE_PDES * sizeof(struct xe_ptw *);
+	pt = kzalloc(size, GFP_KERNEL);
+	if (!pt)
+		return ERR_PTR(-ENOMEM);
+
+	bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				  XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
+				  XE_BO_CREATE_PINNED_BIT |
+				  XE_BO_CREATE_NO_RESV_EVICT |
+				  XE_BO_PAGETABLE);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto err_kfree;
+	}
+	pt->bo = bo;
+	pt->level = level;
+	pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL;
+
+	if (vm->xef)
+		xe_drm_client_add_bo(vm->xef->client, pt->bo);
+	xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL);
+
+	return pt;
+
+err_kfree:
+	kfree(pt);
+	return ERR_PTR(err);
+}
+
+/**
+ * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero
+ * entries.
+ * @tile: The tile the scratch pagetable of which to use.
+ * @vm: The vm we populate for.
+ * @pt: The pagetable the bo of which to initialize.
+ *
+ * Populate the page-table bo of @pt with entries pointing into the tile's
+ * scratch page-table tree if any. Otherwise populate with zeros.
+ */
+void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
+			  struct xe_pt *pt)
+{
+	struct iosys_map *map = &pt->bo->vmap;
+	u64 empty;
+	int i;
+
+	if (!xe_vm_has_scratch(vm)) {
+		/*
+		 * FIXME: Some memory is allocated already allocated to zero?
+		 * Find out which memory that is and avoid this memset...
+		 */
+		xe_map_memset(vm->xe, map, 0, 0, SZ_4K);
+	} else {
+		empty = __xe_pt_empty_pte(tile, vm, pt->level);
+		for (i = 0; i < XE_PDES; i++)
+			xe_pt_write(vm->xe, map, i, empty);
+	}
+}
+
+/**
+ * xe_pt_shift() - Return the ilog2 value of the size of the address range of
+ * a page-table at a certain level.
+ * @level: The level.
+ *
+ * Return: The ilog2 value of the size of the address range of a page-table
+ * at level @level.
+ */
+unsigned int xe_pt_shift(unsigned int level)
+{
+	return XE_PTE_SHIFT + XE_PDE_SHIFT * level;
+}
+
+/**
+ * xe_pt_destroy() - Destroy a page-table tree.
+ * @pt: The root of the page-table tree to destroy.
+ * @flags: vm flags. Currently unused.
+ * @deferred: List head of lockless list for deferred putting. NULL for
+ *            immediate putting.
+ *
+ * Puts the page-table bo, recursively calls xe_pt_destroy on all children
+ * and finally frees @pt. TODO: Can we remove the @flags argument?
+ */
+void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
+{
+	int i;
+
+	if (!pt)
+		return;
+
+	XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list));
+	xe_bo_unpin(pt->bo);
+	xe_bo_put_deferred(pt->bo, deferred);
+
+	if (pt->level > 0 && pt->num_live) {
+		struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+
+		for (i = 0; i < XE_PDES; i++) {
+			if (xe_pt_entry(pt_dir, i))
+				xe_pt_destroy(xe_pt_entry(pt_dir, i), flags,
+					      deferred);
+		}
+	}
+	kfree(pt);
+}
+
+/**
+ * DOC: Pagetable building
+ *
+ * Below we use the term "page-table" for both page-directories, containing
+ * pointers to lower level page-directories or page-tables, and level 0
+ * page-tables that contain only page-table-entries pointing to memory pages.
+ *
+ * When inserting an address range in an already existing page-table tree
+ * there will typically be a set of page-tables that are shared with other
+ * address ranges, and a set that are private to this address range.
+ * The set of shared page-tables can be at most two per level,
+ * and those can't be updated immediately because the entries of those
+ * page-tables may still be in use by the gpu for other mappings. Therefore
+ * when inserting entries into those, we instead stage those insertions by
+ * adding insertion data into struct xe_vm_pgtable_update structures. This
+ * data, (subtrees for the cpu and page-table-entries for the gpu) is then
+ * added in a separate commit step. CPU-data is committed while still under the
+ * vm lock, the object lock and for userptr, the notifier lock in read mode.
+ * The GPU async data is committed either by the GPU or CPU after fulfilling
+ * relevant dependencies.
+ * For non-shared page-tables (and, in fact, for shared ones that aren't
+ * existing at the time of staging), we add the data in-place without the
+ * special update structures. This private part of the page-table tree will
+ * remain disconnected from the vm page-table tree until data is committed to
+ * the shared page tables of the vm tree in the commit phase.
+ */
+
+struct xe_pt_update {
+	/** @update: The update structure we're building for this parent. */
+	struct xe_vm_pgtable_update *update;
+	/** @parent: The parent. Used to detect a parent change. */
+	struct xe_pt *parent;
+	/** @preexisting: Whether the parent was pre-existing or allocated */
+	bool preexisting;
+};
+
+struct xe_pt_stage_bind_walk {
+	/** base: The base class. */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @vm: The vm we're building for. */
+	struct xe_vm *vm;
+	/** @tile: The tile we're building for. */
+	struct xe_tile *tile;
+	/** @default_pte: PTE flag only template. No address is associated */
+	u64 default_pte;
+	/** @dma_offset: DMA offset to add to the PTE. */
+	u64 dma_offset;
+	/**
+	 * @needs_64k: This address range enforces 64K alignment and
+	 * granularity.
+	 */
+	bool needs_64K;
+	/**
+	 * @vma: VMA being mapped
+	 */
+	struct xe_vma *vma;
+
+	/* Also input, but is updated during the walk*/
+	/** @curs: The DMA address cursor. */
+	struct xe_res_cursor *curs;
+	/** @va_curs_start: The Virtual address coresponding to @curs->start */
+	u64 va_curs_start;
+
+	/* Output */
+	struct xe_walk_update {
+		/** @wupd.entries: Caller provided storage. */
+		struct xe_vm_pgtable_update *entries;
+		/** @wupd.num_used_entries: Number of update @entries used. */
+		unsigned int num_used_entries;
+		/** @wupd.updates: Tracks the update entry at a given level */
+		struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1];
+	} wupd;
+
+	/* Walk state */
+	/**
+	 * @l0_end_addr: The end address of the current l0 leaf. Used for
+	 * 64K granularity detection.
+	 */
+	u64 l0_end_addr;
+	/** @addr_64K: The start address of the current 64K chunk. */
+	u64 addr_64K;
+	/** @found_64: Whether @add_64K actually points to a 64K chunk. */
+	bool found_64K;
+};
+
+static int
+xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent,
+		 pgoff_t offset, bool alloc_entries)
+{
+	struct xe_pt_update *upd = &wupd->updates[parent->level];
+	struct xe_vm_pgtable_update *entry;
+
+	/*
+	 * For *each level*, we could only have one active
+	 * struct xt_pt_update at any one time. Once we move on to a
+	 * new parent and page-directory, the old one is complete, and
+	 * updates are either already stored in the build tree or in
+	 * @wupd->entries
+	 */
+	if (likely(upd->parent == parent))
+		return 0;
+
+	upd->parent = parent;
+	upd->preexisting = true;
+
+	if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1)
+		return -EINVAL;
+
+	entry = wupd->entries + wupd->num_used_entries++;
+	upd->update = entry;
+	entry->ofs = offset;
+	entry->pt_bo = parent->bo;
+	entry->pt = parent;
+	entry->flags = 0;
+	entry->qwords = 0;
+
+	if (alloc_entries) {
+		entry->pt_entries = kmalloc_array(XE_PDES,
+						  sizeof(*entry->pt_entries),
+						  GFP_KERNEL);
+		if (!entry->pt_entries)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * NOTE: This is a very frequently called function so we allow ourselves
+ * to annotate (using branch prediction hints) the fastpath of updating a
+ * non-pre-existing pagetable with leaf ptes.
+ */
+static int
+xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent,
+		   pgoff_t offset, struct xe_pt *xe_child, u64 pte)
+{
+	struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level];
+	struct xe_pt_update *child_upd = xe_child ?
+		&xe_walk->wupd.updates[xe_child->level] : NULL;
+	int ret;
+
+	ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true);
+	if (unlikely(ret))
+		return ret;
+
+	/*
+	 * Register this new pagetable so that it won't be recognized as
+	 * a shared pagetable by a subsequent insertion.
+	 */
+	if (unlikely(child_upd)) {
+		child_upd->update = NULL;
+		child_upd->parent = xe_child;
+		child_upd->preexisting = false;
+	}
+
+	if (likely(!upd->preexisting)) {
+		/* Continue building a non-connected subtree. */
+		struct iosys_map *map = &parent->bo->vmap;
+
+		if (unlikely(xe_child))
+			parent->base.dir->entries[offset] = &xe_child->base;
+
+		xe_pt_write(xe_walk->vm->xe, map, offset, pte);
+		parent->num_live++;
+	} else {
+		/* Shared pt. Stage update. */
+		unsigned int idx;
+		struct xe_vm_pgtable_update *entry = upd->update;
+
+		idx = offset - entry->ofs;
+		entry->pt_entries[idx].pt = xe_child;
+		entry->pt_entries[idx].pte = pte;
+		entry->qwords++;
+	}
+
+	return 0;
+}
+
+static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
+				   struct xe_pt_stage_bind_walk *xe_walk)
+{
+	u64 size, dma;
+
+	if (level > MAX_HUGEPTE_LEVEL)
+		return false;
+
+	/* Does the virtual range requested cover a huge pte? */
+	if (!xe_pt_covers(addr, next, level, &xe_walk->base))
+		return false;
+
+	/* Does the DMA segment cover the whole pte? */
+	if (next - xe_walk->va_curs_start > xe_walk->curs->size)
+		return false;
+
+	/* null VMA's do not have dma addresses */
+	if (xe_vma_is_null(xe_walk->vma))
+		return true;
+
+	/* Is the DMA address huge PTE size aligned? */
+	size = next - addr;
+	dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs);
+
+	return IS_ALIGNED(dma, size);
+}
+
+/*
+ * Scan the requested mapping to check whether it can be done entirely
+ * with 64K PTEs.
+ */
+static bool
+xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
+{
+	struct xe_res_cursor curs = *xe_walk->curs;
+
+	if (!IS_ALIGNED(addr, SZ_64K))
+		return false;
+
+	if (next > xe_walk->l0_end_addr)
+		return false;
+
+	/* null VMA's do not have dma addresses */
+	if (xe_vma_is_null(xe_walk->vma))
+		return true;
+
+	xe_res_next(&curs, addr - xe_walk->va_curs_start);
+	for (; addr < next; addr += SZ_64K) {
+		if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K)
+			return false;
+
+		xe_res_next(&curs, SZ_64K);
+	}
+
+	return addr == next;
+}
+
+/*
+ * For non-compact "normal" 4K level-0 pagetables, we want to try to group
+ * addresses together in 64K-contigous regions to add a 64K TLB hint for the
+ * device to the PTE.
+ * This function determines whether the address is part of such a
+ * segment. For VRAM in normal pagetables, this is strictly necessary on
+ * some devices.
+ */
+static bool
+xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
+{
+	/* Address is within an already found 64k region */
+	if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K)
+		return true;
+
+	xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk);
+	xe_walk->addr_64K = addr;
+
+	return xe_walk->found_64K;
+}
+
+static int
+xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
+		       unsigned int level, u64 addr, u64 next,
+		       struct xe_ptw **child,
+		       enum page_walk_action *action,
+		       struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_bind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	u16 pat_index = xe_walk->vma->pat_index;
+	struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
+	struct xe_vm *vm = xe_walk->vm;
+	struct xe_pt *xe_child;
+	bool covers;
+	int ret = 0;
+	u64 pte;
+
+	/* Is this a leaf entry ?*/
+	if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
+		struct xe_res_cursor *curs = xe_walk->curs;
+		bool is_null = xe_vma_is_null(xe_walk->vma);
+
+		XE_WARN_ON(xe_walk->va_curs_start != addr);
+
+		pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
+						 xe_res_dma(curs) + xe_walk->dma_offset,
+						 xe_walk->vma, pat_index, level);
+		pte |= xe_walk->default_pte;
+
+		/*
+		 * Set the XE_PTE_PS64 hint if possible, otherwise if
+		 * this device *requires* 64K PTE size for VRAM, fail.
+		 */
+		if (level == 0 && !xe_parent->is_compact) {
+			if (xe_pt_is_pte_ps64K(addr, next, xe_walk))
+				pte |= XE_PTE_PS64;
+			else if (XE_WARN_ON(xe_walk->needs_64K))
+				return -EINVAL;
+		}
+
+		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte);
+		if (unlikely(ret))
+			return ret;
+
+		if (!is_null)
+			xe_res_next(curs, next - addr);
+		xe_walk->va_curs_start = next;
+		xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level);
+		*action = ACTION_CONTINUE;
+
+		return ret;
+	}
+
+	/*
+	 * Descending to lower level. Determine if we need to allocate a
+	 * new page table or -directory, which we do if there is no
+	 * previous one or there is one we can completely replace.
+	 */
+	if (level == 1) {
+		walk->shifts = xe_normal_pt_shifts;
+		xe_walk->l0_end_addr = next;
+	}
+
+	covers = xe_pt_covers(addr, next, level, &xe_walk->base);
+	if (covers || !*child) {
+		u64 flags = 0;
+
+		xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1);
+		if (IS_ERR(xe_child))
+			return PTR_ERR(xe_child);
+
+		xe_pt_set_addr(xe_child,
+			       round_down(addr, 1ull << walk->shifts[level]));
+
+		if (!covers)
+			xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child);
+
+		*child = &xe_child->base;
+
+		/*
+		 * Prefer the compact pagetable layout for L0 if possible.
+		 * TODO: Suballocate the pt bo to avoid wasting a lot of
+		 * memory.
+		 */
+		if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 &&
+		    covers && xe_pt_scan_64K(addr, next, xe_walk)) {
+			walk->shifts = xe_compact_pt_shifts;
+			flags |= XE_PDE_64K;
+			xe_child->is_compact = true;
+		}
+
+		pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags;
+		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
+					 pte);
+	}
+
+	*action = ACTION_SUBTREE;
+	return ret;
+}
+
+static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
+	.pt_entry = xe_pt_stage_bind_entry,
+};
+
+/**
+ * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
+ * range.
+ * @tile: The tile we're building for.
+ * @vma: The vma indicating the address range.
+ * @entries: Storage for the update entries used for connecting the tree to
+ * the main tree at commit time.
+ * @num_entries: On output contains the number of @entries used.
+ *
+ * This function builds a disconnected page-table tree for a given address
+ * range. The tree is connected to the main vm tree for the gpu using
+ * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind().
+ * The function builds xe_vm_pgtable_update structures for already existing
+ * shared page-tables, and non-existing shared and non-shared page-tables
+ * are built and populated directly.
+ *
+ * Return 0 on success, negative error code on error.
+ */
+static int
+xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
+		 struct xe_vm_pgtable_update *entries, u32 *num_entries)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_bo *bo = xe_vma_bo(vma);
+	bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
+		(xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
+	struct xe_res_cursor curs;
+	struct xe_pt_stage_bind_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_stage_bind_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.vm = xe_vma_vm(vma),
+		.tile = tile,
+		.curs = &curs,
+		.va_curs_start = xe_vma_start(vma),
+		.vma = vma,
+		.wupd.entries = entries,
+		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem,
+	};
+	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+	int ret;
+
+	if (vma && (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) &&
+	    (is_devmem || !IS_DGFX(xe)))
+		xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+
+	if (is_devmem) {
+		xe_walk.default_pte |= XE_PPGTT_PTE_DM;
+		xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource);
+	}
+
+	if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo))
+		xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo));
+
+	xe_bo_assert_held(bo);
+
+	if (!xe_vma_is_null(vma)) {
+		if (xe_vma_is_userptr(vma))
+			xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma),
+					&curs);
+		else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
+			xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
+				     xe_vma_size(vma), &curs);
+		else
+			xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma),
+					xe_vma_size(vma), &curs);
+	} else {
+		curs.size = xe_vma_size(vma);
+	}
+
+	ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma),
+			       xe_vma_end(vma), &xe_walk.base);
+
+	*num_entries = xe_walk.wupd.num_used_entries;
+	return ret;
+}
+
+/**
+ * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a
+ * shared pagetable.
+ * @addr: The start address within the non-shared pagetable.
+ * @end: The end address within the non-shared pagetable.
+ * @level: The level of the non-shared pagetable.
+ * @walk: Walk info. The function adjusts the walk action.
+ * @action: next action to perform (see enum page_walk_action)
+ * @offset: Ignored on input, First non-shared entry on output.
+ * @end_offset: Ignored on input, Last non-shared entry + 1 on output.
+ *
+ * A non-shared page-table has some entries that belong to the address range
+ * and others that don't. This function determines the entries that belong
+ * fully to the address range. Depending on level, some entries may
+ * partially belong to the address range (that can't happen at level 0).
+ * The function detects that and adjust those offsets to not include those
+ * partial entries. Iff it does detect partial entries, we know that there must
+ * be shared page tables also at lower levels, so it adjusts the walk action
+ * accordingly.
+ *
+ * Return: true if there were non-shared entries, false otherwise.
+ */
+static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level,
+				    struct xe_pt_walk *walk,
+				    enum page_walk_action *action,
+				    pgoff_t *offset, pgoff_t *end_offset)
+{
+	u64 size = 1ull << walk->shifts[level];
+
+	*offset = xe_pt_offset(addr, level, walk);
+	*end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset;
+
+	if (!level)
+		return true;
+
+	/*
+	 * If addr or next are not size aligned, there are shared pts at lower
+	 * level, so in that case traverse down the subtree
+	 */
+	*action = ACTION_CONTINUE;
+	if (!IS_ALIGNED(addr, size)) {
+		*action = ACTION_SUBTREE;
+		(*offset)++;
+	}
+
+	if (!IS_ALIGNED(end, size)) {
+		*action = ACTION_SUBTREE;
+		(*end_offset)--;
+	}
+
+	return *end_offset > *offset;
+}
+
+struct xe_pt_zap_ptes_walk {
+	/** @base: The walk base-class */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @tile: The tile we're building for */
+	struct xe_tile *tile;
+
+	/* Output */
+	/** @needs_invalidate: Whether we need to invalidate TLB*/
+	bool needs_invalidate;
+};
+
+static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset,
+				unsigned int level, u64 addr, u64 next,
+				struct xe_ptw **child,
+				enum page_walk_action *action,
+				struct xe_pt_walk *walk)
+{
+	struct xe_pt_zap_ptes_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+	pgoff_t end_offset;
+
+	XE_WARN_ON(!*child);
+	XE_WARN_ON(!level && xe_child->is_compact);
+
+	/*
+	 * Note that we're called from an entry callback, and we're dealing
+	 * with the child of that entry rather than the parent, so need to
+	 * adjust level down.
+	 */
+	if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset,
+				    &end_offset)) {
+		xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap,
+			      offset * sizeof(u64), 0,
+			      (end_offset - offset) * sizeof(u64));
+		xe_walk->needs_invalidate = true;
+	}
+
+	return 0;
+}
+
+static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = {
+	.pt_entry = xe_pt_zap_ptes_entry,
+};
+
+/**
+ * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range
+ * @tile: The tile we're zapping for.
+ * @vma: GPU VMA detailing address range.
+ *
+ * Eviction and Userptr invalidation needs to be able to zap the
+ * gpu ptes of a given address range in pagefaulting mode.
+ * In order to be able to do that, that function needs access to the shared
+ * page-table entrieaso it can either clear the leaf PTEs or
+ * clear the pointers to lower-level page-tables. The caller is required
+ * to hold the necessary locks to ensure neither the page-table connectivity
+ * nor the page-table entries of the range is updated from under us.
+ *
+ * Return: Whether ptes were actually updated and a TLB invalidation is
+ * required.
+ */
+bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
+{
+	struct xe_pt_zap_ptes_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_zap_ptes_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.tile = tile,
+	};
+	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+
+	if (!(vma->tile_present & BIT(tile->id)))
+		return false;
+
+	(void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma),
+				xe_vma_end(vma), &xe_walk.base);
+
+	return xe_walk.needs_invalidate;
+}
+
+static void
+xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile,
+		       struct iosys_map *map, void *data,
+		       u32 qword_ofs, u32 num_qwords,
+		       const struct xe_vm_pgtable_update *update)
+{
+	struct xe_pt_entry *ptes = update->pt_entries;
+	u64 *ptr = data;
+	u32 i;
+
+	for (i = 0; i < num_qwords; i++) {
+		if (map)
+			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
+				  sizeof(u64), u64, ptes[i].pte);
+		else
+			ptr[i] = ptes[i].pte;
+	}
+}
+
+static void xe_pt_abort_bind(struct xe_vma *vma,
+			     struct xe_vm_pgtable_update *entries,
+			     u32 num_entries)
+{
+	u32 i, j;
+
+	for (i = 0; i < num_entries; i++) {
+		if (!entries[i].pt_entries)
+			continue;
+
+		for (j = 0; j < entries[i].qwords; j++)
+			xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL);
+		kfree(entries[i].pt_entries);
+	}
+}
+
+static void xe_pt_commit_locks_assert(struct xe_vma *vma)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+
+	lockdep_assert_held(&vm->lock);
+
+	if (xe_vma_is_userptr(vma))
+		lockdep_assert_held_read(&vm->userptr.notifier_lock);
+	else if (!xe_vma_is_null(vma))
+		dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv);
+
+	xe_vm_assert_held(vm);
+}
+
+static void xe_pt_commit_bind(struct xe_vma *vma,
+			      struct xe_vm_pgtable_update *entries,
+			      u32 num_entries, bool rebind,
+			      struct llist_head *deferred)
+{
+	u32 i, j;
+
+	xe_pt_commit_locks_assert(vma);
+
+	for (i = 0; i < num_entries; i++) {
+		struct xe_pt *pt = entries[i].pt;
+		struct xe_pt_dir *pt_dir;
+
+		if (!rebind)
+			pt->num_live += entries[i].qwords;
+
+		if (!pt->level) {
+			kfree(entries[i].pt_entries);
+			continue;
+		}
+
+		pt_dir = as_xe_pt_dir(pt);
+		for (j = 0; j < entries[i].qwords; j++) {
+			u32 j_ = j + entries[i].ofs;
+			struct xe_pt *newpte = entries[i].pt_entries[j].pt;
+
+			if (xe_pt_entry(pt_dir, j_))
+				xe_pt_destroy(xe_pt_entry(pt_dir, j_),
+					      xe_vma_vm(vma)->flags, deferred);
+
+			pt_dir->dir.entries[j_] = &newpte->base;
+		}
+		kfree(entries[i].pt_entries);
+	}
+}
+
+static int
+xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
+		   struct xe_vm_pgtable_update *entries, u32 *num_entries,
+		   bool rebind)
+{
+	int err;
+
+	*num_entries = 0;
+	err = xe_pt_stage_bind(tile, vma, entries, num_entries);
+	if (!err)
+		xe_tile_assert(tile, *num_entries);
+	else /* abort! */
+		xe_pt_abort_bind(vma, entries, *num_entries);
+
+	return err;
+}
+
+static void xe_vm_dbg_print_entries(struct xe_device *xe,
+				    const struct xe_vm_pgtable_update *entries,
+				    unsigned int num_entries)
+#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
+{
+	unsigned int i;
+
+	vm_dbg(&xe->drm, "%u entries to update\n", num_entries);
+	for (i = 0; i < num_entries; i++) {
+		const struct xe_vm_pgtable_update *entry = &entries[i];
+		struct xe_pt *xe_pt = entry->pt;
+		u64 page_size = 1ull << xe_pt_shift(xe_pt->level);
+		u64 end;
+		u64 start;
+
+		xe_assert(xe, !entry->pt->is_compact);
+		start = entry->ofs * page_size;
+		end = start + page_size * entry->qwords;
+		vm_dbg(&xe->drm,
+		       "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n",
+		       i, xe_pt->level, entry->ofs, entry->qwords,
+		       xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0);
+	}
+}
+#else
+{}
+#endif
+
+#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
+
+static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
+{
+	u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2;
+	static u32 count;
+
+	if (count++ % divisor == divisor - 1) {
+		struct xe_vm *vm = xe_vma_vm(vma);
+
+		vma->userptr.divisor = divisor << 1;
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_move_tail(&vma->userptr.invalidate_link,
+			       &vm->userptr.invalidated);
+		spin_unlock(&vm->userptr.invalidated_lock);
+		return true;
+	}
+
+	return false;
+}
+
+#else
+
+static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma)
+{
+	return false;
+}
+
+#endif
+
+/**
+ * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks
+ * @base: Base we derive from.
+ * @bind: Whether this is a bind or an unbind operation. A bind operation
+ *        makes the pre-commit callback error with -EAGAIN if it detects a
+ *        pending invalidation.
+ * @locked: Whether the pre-commit callback locked the userptr notifier lock
+ *          and it needs unlocking.
+ */
+struct xe_pt_migrate_pt_update {
+	struct xe_migrate_pt_update base;
+	bool bind;
+	bool locked;
+};
+
+/*
+ * This function adds the needed dependencies to a page-table update job
+ * to make sure racing jobs for separate bind engines don't race writing
+ * to the same page-table range, wreaking havoc. Initially use a single
+ * fence for the entire VM. An optimization would use smaller granularity.
+ */
+static int xe_pt_vm_dependencies(struct xe_sched_job *job,
+				 struct xe_range_fence_tree *rftree,
+				 u64 start, u64 last)
+{
+	struct xe_range_fence *rtfence;
+	struct dma_fence *fence;
+	int err;
+
+	rtfence = xe_range_fence_tree_first(rftree, start, last);
+	while (rtfence) {
+		fence = rtfence->fence;
+
+		if (!dma_fence_is_signaled(fence)) {
+			/*
+			 * Is this a CPU update? GPU is busy updating, so return
+			 * an error
+			 */
+			if (!job)
+				return -ETIME;
+
+			dma_fence_get(fence);
+			err = drm_sched_job_add_dependency(&job->drm, fence);
+			if (err)
+				return err;
+		}
+
+		rtfence = xe_range_fence_tree_next(rtfence, start, last);
+	}
+
+	return 0;
+}
+
+static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+	struct xe_range_fence_tree *rftree =
+		&xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id];
+
+	return xe_pt_vm_dependencies(pt_update->job, rftree,
+				     pt_update->start, pt_update->last);
+}
+
+static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+	struct xe_pt_migrate_pt_update *userptr_update =
+		container_of(pt_update, typeof(*userptr_update), base);
+	struct xe_vma *vma = pt_update->vma;
+	unsigned long notifier_seq = vma->userptr.notifier_seq;
+	struct xe_vm *vm = xe_vma_vm(vma);
+	int err = xe_pt_vm_dependencies(pt_update->job,
+					&vm->rftree[pt_update->tile_id],
+					pt_update->start,
+					pt_update->last);
+
+	if (err)
+		return err;
+
+	userptr_update->locked = false;
+
+	/*
+	 * Wait until nobody is running the invalidation notifier, and
+	 * since we're exiting the loop holding the notifier lock,
+	 * nobody can proceed invalidating either.
+	 *
+	 * Note that we don't update the vma->userptr.notifier_seq since
+	 * we don't update the userptr pages.
+	 */
+	do {
+		down_read(&vm->userptr.notifier_lock);
+		if (!mmu_interval_read_retry(&vma->userptr.notifier,
+					     notifier_seq))
+			break;
+
+		up_read(&vm->userptr.notifier_lock);
+
+		if (userptr_update->bind)
+			return -EAGAIN;
+
+		notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
+	} while (true);
+
+	/* Inject errors to test_whether they are handled correctly */
+	if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) {
+		up_read(&vm->userptr.notifier_lock);
+		return -EAGAIN;
+	}
+
+	userptr_update->locked = true;
+
+	return 0;
+}
+
+static const struct xe_migrate_pt_update_ops bind_ops = {
+	.populate = xe_vm_populate_pgtable,
+	.pre_commit = xe_pt_pre_commit,
+};
+
+static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
+	.populate = xe_vm_populate_pgtable,
+	.pre_commit = xe_pt_userptr_pre_commit,
+};
+
+struct invalidation_fence {
+	struct xe_gt_tlb_invalidation_fence base;
+	struct xe_gt *gt;
+	struct xe_vma *vma;
+	struct dma_fence *fence;
+	struct dma_fence_cb cb;
+	struct work_struct work;
+};
+
+static const char *
+invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+	return "xe";
+}
+
+static const char *
+invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+	return "invalidation_fence";
+}
+
+static const struct dma_fence_ops invalidation_fence_ops = {
+	.get_driver_name = invalidation_fence_get_driver_name,
+	.get_timeline_name = invalidation_fence_get_timeline_name,
+};
+
+static void invalidation_fence_cb(struct dma_fence *fence,
+				  struct dma_fence_cb *cb)
+{
+	struct invalidation_fence *ifence =
+		container_of(cb, struct invalidation_fence, cb);
+
+	trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base);
+	if (!ifence->fence->error) {
+		queue_work(system_wq, &ifence->work);
+	} else {
+		ifence->base.base.error = ifence->fence->error;
+		dma_fence_signal(&ifence->base.base);
+		dma_fence_put(&ifence->base.base);
+	}
+	dma_fence_put(ifence->fence);
+}
+
+static void invalidation_fence_work_func(struct work_struct *w)
+{
+	struct invalidation_fence *ifence =
+		container_of(w, struct invalidation_fence, work);
+
+	trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
+	xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
+}
+
+static int invalidation_fence_init(struct xe_gt *gt,
+				   struct invalidation_fence *ifence,
+				   struct dma_fence *fence,
+				   struct xe_vma *vma)
+{
+	int ret;
+
+	trace_xe_gt_tlb_invalidation_fence_create(&ifence->base);
+
+	spin_lock_irq(&gt->tlb_invalidation.lock);
+	dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
+		       &gt->tlb_invalidation.lock,
+		       gt->tlb_invalidation.fence_context,
+		       ++gt->tlb_invalidation.fence_seqno);
+	spin_unlock_irq(&gt->tlb_invalidation.lock);
+
+	INIT_LIST_HEAD(&ifence->base.link);
+
+	dma_fence_get(&ifence->base.base);	/* Ref for caller */
+	ifence->fence = fence;
+	ifence->gt = gt;
+	ifence->vma = vma;
+
+	INIT_WORK(&ifence->work, invalidation_fence_work_func);
+	ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
+	if (ret == -ENOENT) {
+		dma_fence_put(ifence->fence);	/* Usually dropped in CB */
+		invalidation_fence_work_func(&ifence->work);
+	} else if (ret) {
+		dma_fence_put(&ifence->base.base);	/* Caller ref */
+		dma_fence_put(&ifence->base.base);	/* Creation ref */
+	}
+
+	xe_gt_assert(gt, !ret || ret == -ENOENT);
+
+	return ret && ret != -ENOENT ? ret : 0;
+}
+
+static void xe_pt_calc_rfence_interval(struct xe_vma *vma,
+				       struct xe_pt_migrate_pt_update *update,
+				       struct xe_vm_pgtable_update *entries,
+				       u32 num_entries)
+{
+	int i, level = 0;
+
+	for (i = 0; i < num_entries; i++) {
+		const struct xe_vm_pgtable_update *entry = &entries[i];
+
+		if (entry->pt->level > level)
+			level = entry->pt->level;
+	}
+
+	/* Greedy (non-optimal) calculation but simple */
+	update->base.start = ALIGN_DOWN(xe_vma_start(vma),
+					0x1ull << xe_pt_shift(level));
+	update->base.last = ALIGN(xe_vma_end(vma),
+				  0x1ull << xe_pt_shift(level)) - 1;
+}
+
+/**
+ * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma
+ * address range.
+ * @tile: The tile to bind for.
+ * @vma: The vma to bind.
+ * @q: The exec_queue with which to do pipelined page-table updates.
+ * @syncs: Entries to sync on before binding the built tree to the live vm tree.
+ * @num_syncs: Number of @sync entries.
+ * @rebind: Whether we're rebinding this vma to the same address range without
+ * an unbind in-between.
+ *
+ * This function builds a page-table tree (see xe_pt_stage_bind() for more
+ * information on page-table building), and the xe_vm_pgtable_update entries
+ * abstracting the operations needed to attach it to the main vm tree. It
+ * then takes the relevant locks and updates the metadata side of the main
+ * vm tree and submits the operations for pipelined attachment of the
+ * gpu page-table to the vm main tree, (which can be done either by the
+ * cpu and the GPU).
+ *
+ * Return: A valid dma-fence representing the pipelined attachment operation
+ * on success, an error pointer on error.
+ */
+struct dma_fence *
+__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
+		 struct xe_sync_entry *syncs, u32 num_syncs,
+		 bool rebind)
+{
+	struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
+	struct xe_pt_migrate_pt_update bind_pt_update = {
+		.base = {
+			.ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops,
+			.vma = vma,
+			.tile_id = tile->id,
+		},
+		.bind = true,
+	};
+	struct xe_vm *vm = xe_vma_vm(vma);
+	u32 num_entries;
+	struct dma_fence *fence;
+	struct invalidation_fence *ifence = NULL;
+	struct xe_range_fence *rfence;
+	int err;
+
+	bind_pt_update.locked = false;
+	xe_bo_assert_held(xe_vma_bo(vma));
+	xe_vm_assert_held(vm);
+
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
+	       "Preparing bind, with range [%llx...%llx) engine %p.\n",
+	       xe_vma_start(vma), xe_vma_end(vma), q);
+
+	err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind);
+	if (err)
+		goto err;
+	xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
+
+	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
+	xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries,
+				   num_entries);
+
+	/*
+	 * If rebind, we have to invalidate TLB on !LR vms to invalidate
+	 * cached PTEs point to freed memory. on LR vms this is done
+	 * automatically when the context is re-enabled by the rebind worker,
+	 * or in fault mode it was invalidated on PTE zapping.
+	 *
+	 * If !rebind, and scratch enabled VMs, there is a chance the scratch
+	 * PTE is already cached in the TLB so it needs to be invalidated.
+	 * on !LR VMs this is done in the ring ops preceding a batch, but on
+	 * non-faulting LR, in particular on user-space batch buffer chaining,
+	 * it needs to be done here.
+	 */
+	if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) ||
+	    (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
+		ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
+		if (!ifence)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
+	if (!rfence) {
+		kfree(ifence);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fence = xe_migrate_update_pgtables(tile->migrate,
+					   vm, xe_vma_bo(vma), q,
+					   entries, num_entries,
+					   syncs, num_syncs,
+					   &bind_pt_update.base);
+	if (!IS_ERR(fence)) {
+		bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND;
+		LLIST_HEAD(deferred);
+		int err;
+
+		err = xe_range_fence_insert(&vm->rftree[tile->id], rfence,
+					    &xe_range_fence_kfree_ops,
+					    bind_pt_update.base.start,
+					    bind_pt_update.base.last, fence);
+		if (err)
+			dma_fence_wait(fence, false);
+
+		/* TLB invalidation must be done before signaling rebind */
+		if (ifence) {
+			int err = invalidation_fence_init(tile->primary_gt, ifence, fence,
+							  vma);
+			if (err) {
+				dma_fence_put(fence);
+				kfree(ifence);
+				return ERR_PTR(err);
+			}
+			fence = &ifence->base.base;
+		}
+
+		/* add shared fence now for pagetable delayed destroy */
+		dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind &&
+				   last_munmap_rebind ?
+				   DMA_RESV_USAGE_KERNEL :
+				   DMA_RESV_USAGE_BOOKKEEP);
+
+		if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		xe_pt_commit_bind(vma, entries, num_entries, rebind,
+				  bind_pt_update.locked ? &deferred : NULL);
+
+		/* This vma is live (again?) now */
+		vma->tile_present |= BIT(tile->id);
+
+		if (bind_pt_update.locked) {
+			vma->userptr.initial_bind = true;
+			up_read(&vm->userptr.notifier_lock);
+			xe_bo_put_commit(&deferred);
+		}
+		if (!rebind && last_munmap_rebind &&
+		    xe_vm_in_preempt_fence_mode(vm))
+			xe_vm_queue_rebind_worker(vm);
+	} else {
+		kfree(rfence);
+		kfree(ifence);
+		if (bind_pt_update.locked)
+			up_read(&vm->userptr.notifier_lock);
+		xe_pt_abort_bind(vma, entries, num_entries);
+	}
+
+	return fence;
+
+err:
+	return ERR_PTR(err);
+}
+
+struct xe_pt_stage_unbind_walk {
+	/** @base: The pagewalk base-class. */
+	struct xe_pt_walk base;
+
+	/* Input parameters for the walk */
+	/** @tile: The tile we're unbinding from. */
+	struct xe_tile *tile;
+
+	/**
+	 * @modified_start: Walk range start, modified to include any
+	 * shared pagetables that we're the only user of and can thus
+	 * treat as private.
+	 */
+	u64 modified_start;
+	/** @modified_end: Walk range start, modified like @modified_start. */
+	u64 modified_end;
+
+	/* Output */
+	/* @wupd: Structure to track the page-table updates we're building */
+	struct xe_walk_update wupd;
+};
+
+/*
+ * Check whether this range is the only one populating this pagetable,
+ * and in that case, update the walk range checks so that higher levels don't
+ * view us as a shared pagetable.
+ */
+static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level,
+			     const struct xe_pt *child,
+			     enum page_walk_action *action,
+			     struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_unbind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	unsigned int shift = walk->shifts[level];
+	u64 size = 1ull << shift;
+
+	if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) &&
+	    ((next - addr) >> shift) == child->num_live) {
+		u64 size = 1ull << walk->shifts[level + 1];
+
+		*action = ACTION_CONTINUE;
+
+		if (xe_walk->modified_start >= addr)
+			xe_walk->modified_start = round_down(addr, size);
+		if (xe_walk->modified_end <= next)
+			xe_walk->modified_end = round_up(next, size);
+
+		return true;
+	}
+
+	return false;
+}
+
+static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
+				    unsigned int level, u64 addr, u64 next,
+				    struct xe_ptw **child,
+				    enum page_walk_action *action,
+				    struct xe_pt_walk *walk)
+{
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+
+	XE_WARN_ON(!*child);
+	XE_WARN_ON(!level && xe_child->is_compact);
+
+	xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
+
+	return 0;
+}
+
+static int
+xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset,
+				unsigned int level, u64 addr, u64 next,
+				struct xe_ptw **child,
+				enum page_walk_action *action,
+				struct xe_pt_walk *walk)
+{
+	struct xe_pt_stage_unbind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+	pgoff_t end_offset;
+	u64 size = 1ull << walk->shifts[--level];
+
+	if (!IS_ALIGNED(addr, size))
+		addr = xe_walk->modified_start;
+	if (!IS_ALIGNED(next, size))
+		next = xe_walk->modified_end;
+
+	/* Parent == *child is the root pt. Don't kill it. */
+	if (parent != *child &&
+	    xe_pt_check_kill(addr, next, level, xe_child, action, walk))
+		return 0;
+
+	if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset,
+				     &end_offset))
+		return 0;
+
+	(void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false);
+	xe_walk->wupd.updates[level].update->qwords = end_offset - offset;
+
+	return 0;
+}
+
+static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = {
+	.pt_entry = xe_pt_stage_unbind_entry,
+	.pt_post_descend = xe_pt_stage_unbind_post_descend,
+};
+
+/**
+ * xe_pt_stage_unbind() - Build page-table update structures for an unbind
+ * operation
+ * @tile: The tile we're unbinding for.
+ * @vma: The vma we're unbinding.
+ * @entries: Caller-provided storage for the update structures.
+ *
+ * Builds page-table update structures for an unbind operation. The function
+ * will attempt to remove all page-tables that we're the only user
+ * of, and for that to work, the unbind operation must be committed in the
+ * same critical section that blocks racing binds to the same page-table tree.
+ *
+ * Return: The number of entries used.
+ */
+static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma,
+				       struct xe_vm_pgtable_update *entries)
+{
+	struct xe_pt_stage_unbind_walk xe_walk = {
+		.base = {
+			.ops = &xe_pt_stage_unbind_ops,
+			.shifts = xe_normal_pt_shifts,
+			.max_level = XE_PT_HIGHEST_LEVEL,
+		},
+		.tile = tile,
+		.modified_start = xe_vma_start(vma),
+		.modified_end = xe_vma_end(vma),
+		.wupd.entries = entries,
+	};
+	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+
+	(void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma),
+				xe_vma_end(vma), &xe_walk.base);
+
+	return xe_walk.wupd.num_used_entries;
+}
+
+static void
+xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
+				  struct xe_tile *tile, struct iosys_map *map,
+				  void *ptr, u32 qword_ofs, u32 num_qwords,
+				  const struct xe_vm_pgtable_update *update)
+{
+	struct xe_vma *vma = pt_update->vma;
+	u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level);
+	int i;
+
+	if (map && map->is_iomem)
+		for (i = 0; i < num_qwords; ++i)
+			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
+				  sizeof(u64), u64, empty);
+	else if (map)
+		memset64(map->vaddr + qword_ofs * sizeof(u64), empty,
+			 num_qwords);
+	else
+		memset64(ptr, empty, num_qwords);
+}
+
+static void
+xe_pt_commit_unbind(struct xe_vma *vma,
+		    struct xe_vm_pgtable_update *entries, u32 num_entries,
+		    struct llist_head *deferred)
+{
+	u32 j;
+
+	xe_pt_commit_locks_assert(vma);
+
+	for (j = 0; j < num_entries; ++j) {
+		struct xe_vm_pgtable_update *entry = &entries[j];
+		struct xe_pt *pt = entry->pt;
+
+		pt->num_live -= entry->qwords;
+		if (pt->level) {
+			struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+			u32 i;
+
+			for (i = entry->ofs; i < entry->ofs + entry->qwords;
+			     i++) {
+				if (xe_pt_entry(pt_dir, i))
+					xe_pt_destroy(xe_pt_entry(pt_dir, i),
+						      xe_vma_vm(vma)->flags, deferred);
+
+				pt_dir->dir.entries[i] = NULL;
+			}
+		}
+	}
+}
+
+static const struct xe_migrate_pt_update_ops unbind_ops = {
+	.populate = xe_migrate_clear_pgtable_callback,
+	.pre_commit = xe_pt_pre_commit,
+};
+
+static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
+	.populate = xe_migrate_clear_pgtable_callback,
+	.pre_commit = xe_pt_userptr_pre_commit,
+};
+
+/**
+ * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma
+ * address range.
+ * @tile: The tile to unbind for.
+ * @vma: The vma to unbind.
+ * @q: The exec_queue with which to do pipelined page-table updates.
+ * @syncs: Entries to sync on before disconnecting the tree to be destroyed.
+ * @num_syncs: Number of @sync entries.
+ *
+ * This function builds a the xe_vm_pgtable_update entries abstracting the
+ * operations needed to detach the page-table tree to be destroyed from the
+ * man vm tree.
+ * It then takes the relevant locks and submits the operations for
+ * pipelined detachment of the gpu page-table from  the vm main tree,
+ * (which can be done either by the cpu and the GPU), Finally it frees the
+ * detached page-table tree.
+ *
+ * Return: A valid dma-fence representing the pipelined detachment operation
+ * on success, an error pointer on error.
+ */
+struct dma_fence *
+__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
+		   struct xe_sync_entry *syncs, u32 num_syncs)
+{
+	struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
+	struct xe_pt_migrate_pt_update unbind_pt_update = {
+		.base = {
+			.ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops :
+			&unbind_ops,
+			.vma = vma,
+			.tile_id = tile->id,
+		},
+	};
+	struct xe_vm *vm = xe_vma_vm(vma);
+	u32 num_entries;
+	struct dma_fence *fence = NULL;
+	struct invalidation_fence *ifence;
+	struct xe_range_fence *rfence;
+
+	LLIST_HEAD(deferred);
+
+	xe_bo_assert_held(xe_vma_bo(vma));
+	xe_vm_assert_held(vm);
+
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
+	       "Preparing unbind, with range [%llx...%llx) engine %p.\n",
+	       xe_vma_start(vma), xe_vma_end(vma), q);
+
+	num_entries = xe_pt_stage_unbind(tile, vma, entries);
+	xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
+
+	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
+	xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
+				   num_entries);
+
+	ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
+	if (!ifence)
+		return ERR_PTR(-ENOMEM);
+
+	rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
+	if (!rfence) {
+		kfree(ifence);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/*
+	 * Even if we were already evicted and unbind to destroy, we need to
+	 * clear again here. The eviction may have updated pagetables at a
+	 * lower level, because it needs to be more conservative.
+	 */
+	fence = xe_migrate_update_pgtables(tile->migrate,
+					   vm, NULL, q ? q :
+					   vm->q[tile->id],
+					   entries, num_entries,
+					   syncs, num_syncs,
+					   &unbind_pt_update.base);
+	if (!IS_ERR(fence)) {
+		int err;
+
+		err = xe_range_fence_insert(&vm->rftree[tile->id], rfence,
+					    &xe_range_fence_kfree_ops,
+					    unbind_pt_update.base.start,
+					    unbind_pt_update.base.last, fence);
+		if (err)
+			dma_fence_wait(fence, false);
+
+		/* TLB invalidation must be done before signaling unbind */
+		err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma);
+		if (err) {
+			dma_fence_put(fence);
+			kfree(ifence);
+			return ERR_PTR(err);
+		}
+		fence = &ifence->base.base;
+
+		/* add shared fence now for pagetable delayed destroy */
+		dma_resv_add_fence(xe_vm_resv(vm), fence,
+				   DMA_RESV_USAGE_BOOKKEEP);
+
+		/* This fence will be installed by caller when doing eviction */
+		if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		xe_pt_commit_unbind(vma, entries, num_entries,
+				    unbind_pt_update.locked ? &deferred : NULL);
+		vma->tile_present &= ~BIT(tile->id);
+	} else {
+		kfree(rfence);
+		kfree(ifence);
+	}
+
+	if (!vma->tile_present)
+		list_del_init(&vma->combined_links.rebind);
+
+	if (unbind_pt_update.locked) {
+		xe_tile_assert(tile, xe_vma_is_userptr(vma));
+
+		if (!vma->tile_present) {
+			spin_lock(&vm->userptr.invalidated_lock);
+			list_del_init(&vma->userptr.invalidate_link);
+			spin_unlock(&vm->userptr.invalidated_lock);
+		}
+		up_read(&vm->userptr.notifier_lock);
+		xe_bo_put_commit(&deferred);
+	}
+
+	return fence;
+}
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
new file mode 100644
index 000000000000..71a4fbfcff43
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_PT_H_
+#define _XE_PT_H_
+
+#include <linux/types.h>
+
+#include "xe_pt_types.h"
+
+struct dma_fence;
+struct xe_bo;
+struct xe_device;
+struct xe_exec_queue;
+struct xe_sync_entry;
+struct xe_tile;
+struct xe_vm;
+struct xe_vma;
+
+/* Largest huge pte is currently 1GiB. May become device dependent. */
+#define MAX_HUGEPTE_LEVEL 2
+
+#define xe_pt_write(xe, map, idx, data) \
+	xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data)
+
+unsigned int xe_pt_shift(unsigned int level);
+
+struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
+			   unsigned int level);
+
+void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
+			  struct xe_pt *pt);
+
+void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred);
+
+struct dma_fence *
+__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
+		 struct xe_sync_entry *syncs, u32 num_syncs,
+		 bool rebind);
+
+struct dma_fence *
+__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
+		   struct xe_sync_entry *syncs, u32 num_syncs);
+
+bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
new file mode 100644
index 000000000000..cee70cb0f014
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PT_TYPES_H_
+#define _XE_PT_TYPES_H_
+
+#include <linux/types.h>
+
+#include "xe_pt_walk.h"
+
+struct xe_bo;
+struct xe_device;
+struct xe_vma;
+
+enum xe_cache_level {
+	XE_CACHE_NONE,
+	XE_CACHE_WT,
+	XE_CACHE_WB,
+	XE_CACHE_NONE_COMPRESSION, /*UC + COH_NONE + COMPRESSION */
+	__XE_CACHE_LEVEL_COUNT,
+};
+
+#define XE_VM_MAX_LEVEL 4
+
+struct xe_pt {
+	struct xe_ptw base;
+	struct xe_bo *bo;
+	unsigned int level;
+	unsigned int num_live;
+	bool rebind;
+	bool is_compact;
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+	/** addr: Virtual address start address of the PT. */
+	u64 addr;
+#endif
+};
+
+struct xe_pt_ops {
+	u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset,
+			     u16 pat_index, u32 pt_level);
+	u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma,
+			      u16 pat_index, u32 pt_level);
+	u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr,
+			       u16 pat_index,
+			       u32 pt_level, bool devmem, u64 flags);
+	u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset,
+			     u16 pat_index);
+};
+
+struct xe_pt_entry {
+	struct xe_pt *pt;
+	u64 pte;
+};
+
+struct xe_vm_pgtable_update {
+	/** @bo: page table bo to write to */
+	struct xe_bo *pt_bo;
+
+	/** @ofs: offset inside this PTE to begin writing to (in qwords) */
+	u32 ofs;
+
+	/** @qwords: number of PTE's to write */
+	u32 qwords;
+
+	/** @pt: opaque pointer useful for the caller of xe_migrate_update_pgtables */
+	struct xe_pt *pt;
+
+	/** @pt_entries: Newly added pagetable entries */
+	struct xe_pt_entry *pt_entries;
+
+	/** @flags: Target flags */
+	u32 flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c
new file mode 100644
index 000000000000..8f6c8d063f39
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_walk.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#include "xe_pt_walk.h"
+
+/**
+ * DOC: GPU page-table tree walking.
+ * The utilities in this file are similar to the CPU page-table walk
+ * utilities in mm/pagewalk.c. The main difference is that we distinguish
+ * the various levels of a page-table tree with an unsigned integer rather
+ * than by name. 0 is the lowest level, and page-tables with level 0 can
+ * not be directories pointing to lower levels, whereas all other levels
+ * can. The user of the utilities determines the highest level.
+ *
+ * Nomenclature:
+ * Each struct xe_ptw, regardless of level is referred to as a page table, and
+ * multiple page tables typically form a page table tree with page tables at
+ * intermediate levels being page directories pointing at page tables at lower
+ * levels. A shared page table for a given address range is a page-table which
+ * is neither fully within nor fully outside the address range and that can
+ * thus be shared by two or more address ranges.
+ *
+ * Please keep this code generic so that it can used as a drm-wide page-
+ * table walker should other drivers find use for it.
+ */
+static u64 xe_pt_addr_end(u64 addr, u64 end, unsigned int level,
+			  const struct xe_pt_walk *walk)
+{
+	u64 size = 1ull << walk->shifts[level];
+	u64 tmp = round_up(addr + 1, size);
+
+	return min_t(u64, tmp, end);
+}
+
+static bool xe_pt_next(pgoff_t *offset, u64 *addr, u64 next, u64 end,
+		       unsigned int level, const struct xe_pt_walk *walk)
+{
+	pgoff_t step = 1;
+
+	/* Shared pt walk skips to the last pagetable */
+	if (unlikely(walk->shared_pt_mode)) {
+		unsigned int shift = walk->shifts[level];
+		u64 skip_to = round_down(end, 1ull << shift);
+
+		if (skip_to > next) {
+			step += (skip_to - next) >> shift;
+			next = skip_to;
+		}
+	}
+
+	*addr = next;
+	*offset += step;
+
+	return next != end;
+}
+
+/**
+ * xe_pt_walk_range() - Walk a range of a gpu page table tree with callbacks
+ * for each page-table entry in all levels.
+ * @parent: The root page table for walk start.
+ * @level: The root page table level.
+ * @addr: Virtual address start.
+ * @end: Virtual address end + 1.
+ * @walk: Walk info.
+ *
+ * Similar to the CPU page-table walker, this is a helper to walk
+ * a gpu page table and call a provided callback function for each entry.
+ *
+ * Return: 0 on success, negative error code on error. The error is
+ * propagated from the callback and on error the walk is terminated.
+ */
+int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level,
+		     u64 addr, u64 end, struct xe_pt_walk *walk)
+{
+	pgoff_t offset = xe_pt_offset(addr, level, walk);
+	struct xe_ptw **entries = parent->dir ? parent->dir->entries : NULL;
+	const struct xe_pt_walk_ops *ops = walk->ops;
+	enum page_walk_action action;
+	struct xe_ptw *child;
+	int err = 0;
+	u64 next;
+
+	do {
+		next = xe_pt_addr_end(addr, end, level, walk);
+		if (walk->shared_pt_mode && xe_pt_covers(addr, next, level,
+							 walk))
+			continue;
+again:
+		action = ACTION_SUBTREE;
+		child = entries ? entries[offset] : NULL;
+		err = ops->pt_entry(parent, offset, level, addr, next,
+				    &child, &action, walk);
+		if (err)
+			break;
+
+		/* Probably not needed yet for gpu pagetable walk. */
+		if (unlikely(action == ACTION_AGAIN))
+			goto again;
+
+		if (likely(!level || !child || action == ACTION_CONTINUE))
+			continue;
+
+		err = xe_pt_walk_range(child, level - 1, addr, next, walk);
+
+		if (!err && ops->pt_post_descend)
+			err = ops->pt_post_descend(parent, offset, level, addr,
+						   next, &child, &action, walk);
+		if (err)
+			break;
+
+	} while (xe_pt_next(&offset, &addr, next, end, level, walk));
+
+	return err;
+}
+
+/**
+ * xe_pt_walk_shared() - Walk shared page tables of a page-table tree.
+ * @parent: Root page table directory.
+ * @level: Level of the root.
+ * @addr: Start address.
+ * @end: Last address + 1.
+ * @walk: Walk info.
+ *
+ * This function is similar to xe_pt_walk_range() but it skips page tables
+ * that are private to the range. Since the root (or @parent) page table is
+ * typically also a shared page table this function is different in that it
+ * calls the pt_entry callback and the post_descend callback also for the
+ * root. The root can be detected in the callbacks by checking whether
+ * parent == *child.
+ * Walking only the shared page tables is common for unbind-type operations
+ * where the page-table entries for an address range are cleared or detached
+ * from the main page-table tree.
+ *
+ * Return: 0 on success, negative error code on error: If a callback
+ * returns an error, the walk will be terminated and the error returned by
+ * this function.
+ */
+int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level,
+		      u64 addr, u64 end, struct xe_pt_walk *walk)
+{
+	const struct xe_pt_walk_ops *ops = walk->ops;
+	enum page_walk_action action = ACTION_SUBTREE;
+	struct xe_ptw *child = parent;
+	int err;
+
+	walk->shared_pt_mode = true;
+	err = walk->ops->pt_entry(parent, 0, level + 1, addr, end,
+				  &child, &action, walk);
+
+	if (err || action != ACTION_SUBTREE)
+		return err;
+
+	err = xe_pt_walk_range(parent, level, addr, end, walk);
+	if (!err && ops->pt_post_descend) {
+		err = ops->pt_post_descend(parent, 0, level + 1, addr, end,
+					   &child, &action, walk);
+	}
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h
new file mode 100644
index 000000000000..ec3d1e9efa6d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_walk.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef __XE_PT_WALK__
+#define __XE_PT_WALK__
+
+#include <linux/pagewalk.h>
+#include <linux/types.h>
+
+struct xe_ptw_dir;
+
+/**
+ * struct xe_ptw - base class for driver pagetable subclassing.
+ * @dir: Pointer to an array of children if any.
+ *
+ * Drivers could subclass this, and if it's a page-directory, typically
+ * embed the xe_ptw_dir::entries array in the same allocation.
+ */
+struct xe_ptw {
+	struct xe_ptw_dir *dir;
+};
+
+/**
+ * struct xe_ptw_dir - page directory structure
+ * @entries: Array holding page directory children.
+ *
+ * It is the responsibility of the user to ensure @entries is
+ * correctly sized.
+ */
+struct xe_ptw_dir {
+	struct xe_ptw *entries[0];
+};
+
+/**
+ * struct xe_pt_walk - Embeddable struct for walk parameters
+ */
+struct xe_pt_walk {
+	/** @ops: The walk ops used for the pagewalk */
+	const struct xe_pt_walk_ops *ops;
+	/**
+	 * @shifts: Array of page-table entry shifts used for the
+	 * different levels, starting out with the leaf level 0
+	 * page-shift as the first entry. It's legal for this pointer to be
+	 * changed during the walk.
+	 */
+	const u64 *shifts;
+	/** @max_level: Highest populated level in @sizes */
+	unsigned int max_level;
+	/**
+	 * @shared_pt_mode: Whether to skip all entries that are private
+	 * to the address range and called only for entries that are
+	 * shared with other address ranges. Such entries are referred to
+	 * as shared pagetables.
+	 */
+	bool shared_pt_mode;
+};
+
+/**
+ * typedef xe_pt_entry_fn - gpu page-table-walk callback-function
+ * @parent: The parent page.table.
+ * @offset: The offset (number of entries) into the page table.
+ * @level: The level of @parent.
+ * @addr: The virtual address.
+ * @next: The virtual address for the next call, or end address.
+ * @child: Pointer to pointer to child page-table at this @offset. The
+ * function may modify the value pointed to if, for example, allocating a
+ * child page table.
+ * @action: The walk action to take upon return. See <linux/pagewalk.h>.
+ * @walk: The walk parameters.
+ */
+typedef int (*xe_pt_entry_fn)(struct xe_ptw *parent, pgoff_t offset,
+			      unsigned int level, u64 addr, u64 next,
+			      struct xe_ptw **child,
+			      enum page_walk_action *action,
+			      struct xe_pt_walk *walk);
+
+/**
+ * struct xe_pt_walk_ops - Walk callbacks.
+ */
+struct xe_pt_walk_ops {
+	/**
+	 * @pt_entry: Callback to be called for each page table entry prior
+	 * to descending to the next level. The returned value of the action
+	 * function parameter is honored.
+	 */
+	xe_pt_entry_fn pt_entry;
+	/**
+	 * @pt_post_descend: Callback to be called for each page table entry
+	 * after return from descending to the next level. The returned value
+	 * of the action function parameter is ignored.
+	 */
+	xe_pt_entry_fn pt_post_descend;
+};
+
+int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level,
+		     u64 addr, u64 end, struct xe_pt_walk *walk);
+
+int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level,
+		      u64 addr, u64 end, struct xe_pt_walk *walk);
+
+/**
+ * xe_pt_covers - Whether the address range covers an entire entry in @level
+ * @addr: Start of the range.
+ * @end: End of range + 1.
+ * @level: Page table level.
+ * @walk: Page table walk info.
+ *
+ * This function is a helper to aid in determining whether a leaf page table
+ * entry can be inserted at this @level.
+ *
+ * Return: Whether the range provided covers exactly an entry at this level.
+ */
+static inline bool xe_pt_covers(u64 addr, u64 end, unsigned int level,
+				const struct xe_pt_walk *walk)
+{
+	u64 pt_size = 1ull << walk->shifts[level];
+
+	return end - addr == pt_size && IS_ALIGNED(addr, pt_size);
+}
+
+/**
+ * xe_pt_num_entries: Number of page-table entries of a given range at this
+ * level
+ * @addr: Start address.
+ * @end: End address.
+ * @level: Page table level.
+ * @walk: Walk info.
+ *
+ * Return: The number of page table entries at this level between @start and
+ * @end.
+ */
+static inline pgoff_t
+xe_pt_num_entries(u64 addr, u64 end, unsigned int level,
+		  const struct xe_pt_walk *walk)
+{
+	u64 pt_size = 1ull << walk->shifts[level];
+
+	return (round_up(end, pt_size) - round_down(addr, pt_size)) >>
+		walk->shifts[level];
+}
+
+/**
+ * xe_pt_offset: Offset of the page-table entry for a given address.
+ * @addr: The address.
+ * @level: Page table level.
+ * @walk: Walk info.
+ *
+ * Return: The page table entry offset for the given address in a
+ * page table with size indicated by @level.
+ */
+static inline pgoff_t
+xe_pt_offset(u64 addr, unsigned int level, const struct xe_pt_walk *walk)
+{
+	if (level < walk->max_level)
+		addr &= ((1ull << walk->shifts[level + 1]) - 1);
+
+	return addr >> walk->shifts[level];
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
new file mode 100644
index 000000000000..9b35673b286c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_query.h"
+
+#include <linux/nospec.h>
+#include <linux/sched/clock.h>
+
+#include <drm/ttm/ttm_placement.h>
+#include <drm/xe_drm.h>
+
+#include "regs/xe_engine_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_guc_hwconfig.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_ttm_vram_mgr.h"
+
+static const u16 xe_to_user_engine_class[] = {
+	[XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
+	[XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY,
+	[XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE,
+	[XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE,
+	[XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE,
+};
+
+static const enum xe_engine_class user_to_xe_engine_class[] = {
+	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
+	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
+	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
+	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
+};
+
+static size_t calc_hw_engine_info_size(struct xe_device *xe)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct xe_gt *gt;
+	u8 gt_id;
+	int i = 0;
+
+	for_each_gt(gt, xe, gt_id)
+		for_each_hw_engine(hwe, gt, id) {
+			if (xe_hw_engine_is_reserved(hwe))
+				continue;
+			i++;
+		}
+
+	return sizeof(struct drm_xe_query_engines) +
+		i * sizeof(struct drm_xe_engine);
+}
+
+typedef u64 (*__ktime_func_t)(void);
+static __ktime_func_t __clock_id_to_func(clockid_t clk_id)
+{
+	/*
+	 * Use logic same as the perf subsystem to allow user to select the
+	 * reference clock id to be used for timestamps.
+	 */
+	switch (clk_id) {
+	case CLOCK_MONOTONIC:
+		return &ktime_get_ns;
+	case CLOCK_MONOTONIC_RAW:
+		return &ktime_get_raw_ns;
+	case CLOCK_REALTIME:
+		return &ktime_get_real_ns;
+	case CLOCK_BOOTTIME:
+		return &ktime_get_boottime_ns;
+	case CLOCK_TAI:
+		return &ktime_get_clocktai_ns;
+	default:
+		return NULL;
+	}
+}
+
+static void
+__read_timestamps(struct xe_gt *gt,
+		  struct xe_reg lower_reg,
+		  struct xe_reg upper_reg,
+		  u64 *engine_ts,
+		  u64 *cpu_ts,
+		  u64 *cpu_delta,
+		  __ktime_func_t cpu_clock)
+{
+	u32 upper, lower, old_upper, loop = 0;
+
+	upper = xe_mmio_read32(gt, upper_reg);
+	do {
+		*cpu_delta = local_clock();
+		*cpu_ts = cpu_clock();
+		lower = xe_mmio_read32(gt, lower_reg);
+		*cpu_delta = local_clock() - *cpu_delta;
+		old_upper = upper;
+		upper = xe_mmio_read32(gt, upper_reg);
+	} while (upper != old_upper && loop++ < 2);
+
+	*engine_ts = (u64)upper << 32 | lower;
+}
+
+static int
+query_engine_cycles(struct xe_device *xe,
+		    struct drm_xe_device_query *query)
+{
+	struct drm_xe_query_engine_cycles __user *query_ptr;
+	struct drm_xe_engine_class_instance *eci;
+	struct drm_xe_query_engine_cycles resp;
+	size_t size = sizeof(resp);
+	__ktime_func_t cpu_clock;
+	struct xe_hw_engine *hwe;
+	struct xe_gt *gt;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	query_ptr = u64_to_user_ptr(query->data);
+	if (copy_from_user(&resp, query_ptr, size))
+		return -EFAULT;
+
+	cpu_clock = __clock_id_to_func(resp.clockid);
+	if (!cpu_clock)
+		return -EINVAL;
+
+	eci = &resp.eci;
+	if (eci->gt_id > XE_MAX_GT_PER_TILE)
+		return -EINVAL;
+
+	gt = xe_device_get_gt(xe, eci->gt_id);
+	if (!gt)
+		return -EINVAL;
+
+	if (eci->engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
+		return -EINVAL;
+
+	hwe = xe_gt_hw_engine(gt, user_to_xe_engine_class[eci->engine_class],
+			      eci->engine_instance, true);
+	if (!hwe)
+		return -EINVAL;
+
+	xe_device_mem_access_get(xe);
+	xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+
+	__read_timestamps(gt,
+			  RING_TIMESTAMP(hwe->mmio_base),
+			  RING_TIMESTAMP_UDW(hwe->mmio_base),
+			  &resp.engine_cycles,
+			  &resp.cpu_timestamp,
+			  &resp.cpu_delta,
+			  cpu_clock);
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	xe_device_mem_access_put(xe);
+	resp.width = 36;
+
+	/* Only write to the output fields of user query */
+	if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp))
+		return -EFAULT;
+
+	if (put_user(resp.cpu_delta, &query_ptr->cpu_delta))
+		return -EFAULT;
+
+	if (put_user(resp.engine_cycles, &query_ptr->engine_cycles))
+		return -EFAULT;
+
+	if (put_user(resp.width, &query_ptr->width))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int query_engines(struct xe_device *xe,
+			 struct drm_xe_device_query *query)
+{
+	size_t size = calc_hw_engine_info_size(xe);
+	struct drm_xe_query_engines __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct drm_xe_query_engines *engines;
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct xe_gt *gt;
+	u8 gt_id;
+	int i = 0;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	engines = kmalloc(size, GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	for_each_gt(gt, xe, gt_id)
+		for_each_hw_engine(hwe, gt, id) {
+			if (xe_hw_engine_is_reserved(hwe))
+				continue;
+
+			engines->engines[i].instance.engine_class =
+				xe_to_user_engine_class[hwe->class];
+			engines->engines[i].instance.engine_instance =
+				hwe->logical_instance;
+			engines->engines[i].instance.gt_id = gt->info.id;
+			engines->engines[i].instance.pad = 0;
+			memset(engines->engines[i].reserved, 0,
+			       sizeof(engines->engines[i].reserved));
+
+			i++;
+		}
+
+	engines->pad = 0;
+	engines->num_engines = i;
+
+	if (copy_to_user(query_ptr, engines, size)) {
+		kfree(engines);
+		return -EFAULT;
+	}
+	kfree(engines);
+
+	return 0;
+}
+
+static size_t calc_mem_regions_size(struct xe_device *xe)
+{
+	u32 num_managers = 1;
+	int i;
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i)
+		if (ttm_manager_type(&xe->ttm, i))
+			num_managers++;
+
+	return offsetof(struct drm_xe_query_mem_regions, mem_regions[num_managers]);
+}
+
+static int query_mem_regions(struct xe_device *xe,
+			    struct drm_xe_device_query *query)
+{
+	size_t size = calc_mem_regions_size(xe);
+	struct drm_xe_query_mem_regions *mem_regions;
+	struct drm_xe_query_mem_regions __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct ttm_resource_manager *man;
+	int ret, i;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	mem_regions = kzalloc(size, GFP_KERNEL);
+	if (XE_IOCTL_DBG(xe, !mem_regions))
+		return -ENOMEM;
+
+	man = ttm_manager_type(&xe->ttm, XE_PL_TT);
+	mem_regions->mem_regions[0].mem_class = DRM_XE_MEM_REGION_CLASS_SYSMEM;
+	/*
+	 * The instance needs to be a unique number that represents the index
+	 * in the placement mask used at xe_gem_create_ioctl() for the
+	 * xe_bo_create() placement.
+	 */
+	mem_regions->mem_regions[0].instance = 0;
+	mem_regions->mem_regions[0].min_page_size = PAGE_SIZE;
+	mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT;
+	if (perfmon_capable())
+		mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
+	mem_regions->num_mem_regions = 1;
+
+	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
+		man = ttm_manager_type(&xe->ttm, i);
+		if (man) {
+			mem_regions->mem_regions[mem_regions->num_mem_regions].mem_class =
+				DRM_XE_MEM_REGION_CLASS_VRAM;
+			mem_regions->mem_regions[mem_regions->num_mem_regions].instance =
+				mem_regions->num_mem_regions;
+			mem_regions->mem_regions[mem_regions->num_mem_regions].min_page_size =
+				xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ?
+				SZ_64K : PAGE_SIZE;
+			mem_regions->mem_regions[mem_regions->num_mem_regions].total_size =
+				man->size;
+
+			if (perfmon_capable()) {
+				xe_ttm_vram_get_used(man,
+					&mem_regions->mem_regions
+					[mem_regions->num_mem_regions].used,
+					&mem_regions->mem_regions
+					[mem_regions->num_mem_regions].cpu_visible_used);
+			}
+
+			mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size =
+				xe_ttm_vram_get_cpu_visible_size(man);
+			mem_regions->num_mem_regions++;
+		}
+	}
+
+	if (!copy_to_user(query_ptr, mem_regions, size))
+		ret = 0;
+	else
+		ret = -ENOSPC;
+
+	kfree(mem_regions);
+	return ret;
+}
+
+static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
+{
+	const u32 num_params = DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1;
+	size_t size =
+		sizeof(struct drm_xe_query_config) + num_params * sizeof(u64);
+	struct drm_xe_query_config __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct drm_xe_query_config *config;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	config = kzalloc(size, GFP_KERNEL);
+	if (!config)
+		return -ENOMEM;
+
+	config->num_params = num_params;
+	config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] =
+		xe->info.devid | (xe->info.revid << 16);
+	if (xe_device_get_root_tile(xe)->mem.vram.usable_size)
+		config->info[DRM_XE_QUERY_CONFIG_FLAGS] =
+			DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM;
+	config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] =
+		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+	config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
+	config->info[DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] =
+		xe_exec_queue_device_get_max_priority(xe);
+
+	if (copy_to_user(query_ptr, config, size)) {
+		kfree(config);
+		return -EFAULT;
+	}
+	kfree(config);
+
+	return 0;
+}
+
+static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query)
+{
+	struct xe_gt *gt;
+	size_t size = sizeof(struct drm_xe_query_gt_list) +
+		xe->info.gt_count * sizeof(struct drm_xe_gt);
+	struct drm_xe_query_gt_list __user *query_ptr =
+		u64_to_user_ptr(query->data);
+	struct drm_xe_query_gt_list *gt_list;
+	u8 id;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	gt_list = kzalloc(size, GFP_KERNEL);
+	if (!gt_list)
+		return -ENOMEM;
+
+	gt_list->num_gt = xe->info.gt_count;
+
+	for_each_gt(gt, xe, id) {
+		if (xe_gt_is_media_type(gt))
+			gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA;
+		else
+			gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN;
+		gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id;
+		gt_list->gt_list[id].gt_id = gt->info.id;
+		gt_list->gt_list[id].reference_clock = gt->info.reference_clock;
+		/*
+		 * The mem_regions indexes in the mask below need to
+		 * directly identify the struct
+		 * drm_xe_query_mem_regions' instance constructed at
+		 * query_mem_regions()
+		 *
+		 * For our current platforms:
+		 * Bit 0 -> System Memory
+		 * Bit 1 -> VRAM0 on Tile0
+		 * Bit 2 -> VRAM1 on Tile1
+		 * However the uAPI is generic and it's userspace's
+		 * responsibility to check the mem_class, without any
+		 * assumption.
+		 */
+		if (!IS_DGFX(xe))
+			gt_list->gt_list[id].near_mem_regions = 0x1;
+		else
+			gt_list->gt_list[id].near_mem_regions =
+				BIT(gt_to_tile(gt)->id) << 1;
+		gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^
+			gt_list->gt_list[id].near_mem_regions;
+	}
+
+	if (copy_to_user(query_ptr, gt_list, size)) {
+		kfree(gt_list);
+		return -EFAULT;
+	}
+	kfree(gt_list);
+
+	return 0;
+}
+
+static int query_hwconfig(struct xe_device *xe,
+			  struct drm_xe_device_query *query)
+{
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	size_t size = xe_guc_hwconfig_size(&gt->uc.guc);
+	void __user *query_ptr = u64_to_user_ptr(query->data);
+	void *hwconfig;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	hwconfig = kzalloc(size, GFP_KERNEL);
+	if (!hwconfig)
+		return -ENOMEM;
+
+	xe_device_mem_access_get(xe);
+	xe_guc_hwconfig_copy(&gt->uc.guc, hwconfig);
+	xe_device_mem_access_put(xe);
+
+	if (copy_to_user(query_ptr, hwconfig, size)) {
+		kfree(hwconfig);
+		return -EFAULT;
+	}
+	kfree(hwconfig);
+
+	return 0;
+}
+
+static size_t calc_topo_query_size(struct xe_device *xe)
+{
+	return xe->info.gt_count *
+		(3 * sizeof(struct drm_xe_query_topology_mask) +
+		 sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
+		 sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
+		 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
+}
+
+static void __user *copy_mask(void __user *ptr,
+			      struct drm_xe_query_topology_mask *topo,
+			      void *mask, size_t mask_size)
+{
+	topo->num_bytes = mask_size;
+
+	if (copy_to_user(ptr, topo, sizeof(*topo)))
+		return ERR_PTR(-EFAULT);
+	ptr += sizeof(topo);
+
+	if (copy_to_user(ptr, mask, mask_size))
+		return ERR_PTR(-EFAULT);
+	ptr += mask_size;
+
+	return ptr;
+}
+
+static int query_gt_topology(struct xe_device *xe,
+			     struct drm_xe_device_query *query)
+{
+	void __user *query_ptr = u64_to_user_ptr(query->data);
+	size_t size = calc_topo_query_size(xe);
+	struct drm_xe_query_topology_mask topo;
+	struct xe_gt *gt;
+	int id;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	for_each_gt(gt, xe, id) {
+		topo.gt_id = id;
+
+		topo.type = DRM_XE_TOPO_DSS_GEOMETRY;
+		query_ptr = copy_mask(query_ptr, &topo,
+				      gt->fuse_topo.g_dss_mask,
+				      sizeof(gt->fuse_topo.g_dss_mask));
+		if (IS_ERR(query_ptr))
+			return PTR_ERR(query_ptr);
+
+		topo.type = DRM_XE_TOPO_DSS_COMPUTE;
+		query_ptr = copy_mask(query_ptr, &topo,
+				      gt->fuse_topo.c_dss_mask,
+				      sizeof(gt->fuse_topo.c_dss_mask));
+		if (IS_ERR(query_ptr))
+			return PTR_ERR(query_ptr);
+
+		topo.type = DRM_XE_TOPO_EU_PER_DSS;
+		query_ptr = copy_mask(query_ptr, &topo,
+				      gt->fuse_topo.eu_mask_per_dss,
+				      sizeof(gt->fuse_topo.eu_mask_per_dss));
+		if (IS_ERR(query_ptr))
+			return PTR_ERR(query_ptr);
+	}
+
+	return 0;
+}
+
+static int (* const xe_query_funcs[])(struct xe_device *xe,
+				      struct drm_xe_device_query *query) = {
+	query_engines,
+	query_mem_regions,
+	query_config,
+	query_gt_list,
+	query_hwconfig,
+	query_gt_topology,
+	query_engine_cycles,
+};
+
+int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct drm_xe_device_query *query = data;
+	u32 idx;
+
+	if (XE_IOCTL_DBG(xe, query->extensions) ||
+	    XE_IOCTL_DBG(xe, query->reserved[0] || query->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, query->query >= ARRAY_SIZE(xe_query_funcs)))
+		return -EINVAL;
+
+	idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs));
+	if (XE_IOCTL_DBG(xe, !xe_query_funcs[idx]))
+		return -EINVAL;
+
+	return xe_query_funcs[idx](xe, query);
+}
diff --git a/drivers/gpu/drm/xe/xe_query.h b/drivers/gpu/drm/xe/xe_query.h
new file mode 100644
index 000000000000..beeb7a8192b4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_query.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_QUERY_H_
+#define _XE_QUERY_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_range_fence.c b/drivers/gpu/drm/xe/xe_range_fence.c
new file mode 100644
index 000000000000..d35d9ec58e86
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_range_fence.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/slab.h>
+
+#include "xe_macros.h"
+#include "xe_range_fence.h"
+
+#define XE_RANGE_TREE_START(_node)	((_node)->start)
+#define XE_RANGE_TREE_LAST(_node)	((_node)->last)
+
+INTERVAL_TREE_DEFINE(struct xe_range_fence, rb, u64, __subtree_last,
+		     XE_RANGE_TREE_START, XE_RANGE_TREE_LAST, static,
+		     xe_range_fence_tree);
+
+static void
+xe_range_fence_signal_notify(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct xe_range_fence *rfence = container_of(cb, typeof(*rfence), cb);
+	struct xe_range_fence_tree *tree = rfence->tree;
+
+	llist_add(&rfence->link, &tree->list);
+}
+
+static bool __xe_range_fence_tree_cleanup(struct xe_range_fence_tree *tree)
+{
+	struct llist_node *node = llist_del_all(&tree->list);
+	struct xe_range_fence *rfence, *next;
+
+	llist_for_each_entry_safe(rfence, next, node, link) {
+		xe_range_fence_tree_remove(rfence, &tree->root);
+		dma_fence_put(rfence->fence);
+		kfree(rfence);
+	}
+
+	return !!node;
+}
+
+/**
+ * xe_range_fence_insert() - range fence insert
+ * @tree: range fence tree to insert intoi
+ * @rfence: range fence
+ * @ops: range fence ops
+ * @start: start address of range fence
+ * @last: last address of range fence
+ * @fence: dma fence which signals range fence can be removed + freed
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+int xe_range_fence_insert(struct xe_range_fence_tree *tree,
+			  struct xe_range_fence *rfence,
+			  const struct xe_range_fence_ops *ops,
+			  u64 start, u64 last, struct dma_fence *fence)
+{
+	int err = 0;
+
+	__xe_range_fence_tree_cleanup(tree);
+
+	if (dma_fence_is_signaled(fence))
+		goto free;
+
+	rfence->ops = ops;
+	rfence->start = start;
+	rfence->last = last;
+	rfence->tree = tree;
+	rfence->fence = dma_fence_get(fence);
+	err = dma_fence_add_callback(fence, &rfence->cb,
+				     xe_range_fence_signal_notify);
+	if (err == -ENOENT) {
+		dma_fence_put(fence);
+		err = 0;
+		goto free;
+	} else if (err == 0) {
+		xe_range_fence_tree_insert(rfence, &tree->root);
+		return 0;
+	}
+
+free:
+	if (ops->free)
+		ops->free(rfence);
+
+	return err;
+}
+
+static void xe_range_fence_tree_remove_all(struct xe_range_fence_tree *tree)
+{
+	struct xe_range_fence *rfence;
+	bool retry = true;
+
+	rfence = xe_range_fence_tree_iter_first(&tree->root, 0, U64_MAX);
+	while (rfence) {
+		/* Should be ok with the minimalistic callback */
+		if (dma_fence_remove_callback(rfence->fence, &rfence->cb))
+			llist_add(&rfence->link, &tree->list);
+		rfence = xe_range_fence_tree_iter_next(rfence, 0, U64_MAX);
+	}
+
+	while (retry)
+		retry = __xe_range_fence_tree_cleanup(tree);
+}
+
+/**
+ * xe_range_fence_tree_init() - Init range fence tree
+ * @tree: range fence tree
+ */
+void xe_range_fence_tree_init(struct xe_range_fence_tree *tree)
+{
+	memset(tree, 0, sizeof(*tree));
+}
+
+/**
+ * xe_range_fence_tree_fini() - Fini range fence tree
+ * @tree: range fence tree
+ */
+void xe_range_fence_tree_fini(struct xe_range_fence_tree *tree)
+{
+	xe_range_fence_tree_remove_all(tree);
+	XE_WARN_ON(!RB_EMPTY_ROOT(&tree->root.rb_root));
+}
+
+/**
+ * xe_range_fence_tree_first() - range fence tree iterator first
+ * @tree: range fence tree
+ * @start: start address of range fence
+ * @last: last address of range fence
+ *
+ * Return: first range fence found in range or NULL
+ */
+struct xe_range_fence *
+xe_range_fence_tree_first(struct xe_range_fence_tree *tree, u64 start,
+			  u64 last)
+{
+	return xe_range_fence_tree_iter_first(&tree->root, start, last);
+}
+
+/**
+ * xe_range_fence_tree_next() - range fence tree iterator next
+ * @rfence: current range fence
+ * @start: start address of range fence
+ * @last: last address of range fence
+ *
+ * Return: next range fence found in range or NULL
+ */
+struct xe_range_fence *
+xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last)
+{
+	return xe_range_fence_tree_iter_next(rfence, start, last);
+}
+
+const struct xe_range_fence_ops xe_range_fence_kfree_ops = {
+	.free = (void (*)(struct xe_range_fence *rfence)) kfree,
+};
diff --git a/drivers/gpu/drm/xe/xe_range_fence.h b/drivers/gpu/drm/xe/xe_range_fence.h
new file mode 100644
index 000000000000..edd58b34f5c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_range_fence.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_RANGE_FENCE_H_
+#define _XE_RANGE_FENCE_H_
+
+#include <linux/dma-fence.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct xe_range_fence_tree;
+struct xe_range_fence;
+
+/** struct xe_range_fence_ops - XE range fence ops */
+struct xe_range_fence_ops {
+	/** @free: free range fence op */
+	void (*free)(struct xe_range_fence *rfence);
+};
+
+/** struct xe_range_fence - XE range fence (address conflict tracking) */
+struct xe_range_fence {
+	/** @rb: RB tree node inserted into interval tree */
+	struct rb_node rb;
+	/** @start: start address of range fence is interval tree */
+	u64 start;
+	/** @last: last address (inclusive) of range fence is interval tree */
+	u64 last;
+	/** @__subtree_last: interval tree internal usage */
+	u64 __subtree_last;
+	/**
+	 * @fence: fence signals address in range fence no longer has conflict
+	 */
+	struct dma_fence *fence;
+	/** @tree: interval tree which range fence belongs to */
+	struct xe_range_fence_tree *tree;
+	/**
+	 * @cb: callback when fence signals to remove range fence free from interval tree
+	 */
+	struct dma_fence_cb cb;
+	/** @link: used to defer free of range fence to non-irq context */
+	struct llist_node link;
+	/** @ops: range fence ops */
+	const struct xe_range_fence_ops *ops;
+};
+
+/** struct xe_range_fence_tree - interval tree to store range fences */
+struct xe_range_fence_tree {
+	/** @root: interval tree root */
+	struct rb_root_cached root;
+	/** @list: list of pending range fences to be freed */
+	struct llist_head list;
+};
+
+extern const struct xe_range_fence_ops xe_range_fence_kfree_ops;
+
+struct xe_range_fence *
+xe_range_fence_tree_first(struct xe_range_fence_tree *tree, u64 start,
+			  u64 last);
+
+struct xe_range_fence *
+xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last);
+
+void xe_range_fence_tree_init(struct xe_range_fence_tree *tree);
+
+void xe_range_fence_tree_fini(struct xe_range_fence_tree *tree);
+
+int xe_range_fence_insert(struct xe_range_fence_tree *tree,
+			  struct xe_range_fence *rfence,
+			  const struct xe_range_fence_ops *ops,
+			  u64 start, u64 end,
+			  struct dma_fence *fence);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
new file mode 100644
index 000000000000..87adefb56024
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_reg_sr.h"
+
+#include <kunit/visibility.h>
+#include <linux/align.h>
+#include <linux/string_helpers.h>
+#include <linux/xarray.h>
+
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
+#include "xe_hw_engine_types.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_reg_whitelist.h"
+#include "xe_rtp_types.h"
+
+#define XE_REG_SR_GROW_STEP_DEFAULT	16
+
+static void reg_sr_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_reg_sr *sr = arg;
+
+	xa_destroy(&sr->xa);
+	kfree(sr->pool.arr);
+	memset(&sr->pool, 0, sizeof(sr->pool));
+}
+
+int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe)
+{
+	xa_init(&sr->xa);
+	memset(&sr->pool, 0, sizeof(sr->pool));
+	sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT;
+	sr->name = name;
+
+	return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init);
+
+static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr)
+{
+	if (sr->pool.used == sr->pool.allocated) {
+		struct xe_reg_sr_entry *arr;
+
+		arr = krealloc_array(sr->pool.arr,
+				     ALIGN(sr->pool.allocated + 1, sr->pool.grow_step),
+				     sizeof(*arr), GFP_KERNEL);
+		if (!arr)
+			return NULL;
+
+		sr->pool.arr = arr;
+		sr->pool.allocated += sr->pool.grow_step;
+	}
+
+	return &sr->pool.arr[sr->pool.used++];
+}
+
+static bool compatible_entries(const struct xe_reg_sr_entry *e1,
+			       const struct xe_reg_sr_entry *e2)
+{
+	/*
+	 * Don't allow overwriting values: clr_bits/set_bits should be disjoint
+	 * when operating in the same register
+	 */
+	if (e1->clr_bits & e2->clr_bits || e1->set_bits & e2->set_bits ||
+	    e1->clr_bits & e2->set_bits || e1->set_bits & e2->clr_bits)
+		return false;
+
+	if (e1->reg.raw != e2->reg.raw)
+		return false;
+
+	return true;
+}
+
+static void reg_sr_inc_error(struct xe_reg_sr *sr)
+{
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+	sr->errors++;
+#endif
+}
+
+int xe_reg_sr_add(struct xe_reg_sr *sr,
+		  const struct xe_reg_sr_entry *e,
+		  struct xe_gt *gt)
+{
+	unsigned long idx = e->reg.addr;
+	struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx);
+	int ret;
+
+	if (pentry) {
+		if (!compatible_entries(pentry, e)) {
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		pentry->clr_bits |= e->clr_bits;
+		pentry->set_bits |= e->set_bits;
+		pentry->read_mask |= e->read_mask;
+
+		return 0;
+	}
+
+	pentry = alloc_entry(sr);
+	if (!pentry) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	*pentry = *e;
+	ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL));
+	if (ret)
+		goto fail;
+
+	return 0;
+
+fail:
+	xe_gt_err(gt,
+		  "discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n",
+		  idx, e->clr_bits, e->set_bits,
+		  str_yes_no(e->reg.masked),
+		  str_yes_no(e->reg.mcr),
+		  ret);
+	reg_sr_inc_error(sr);
+
+	return ret;
+}
+
+/*
+ * Convert back from encoded value to type-safe, only to be used when reg.mcr
+ * is true
+ */
+static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg)
+{
+	return (const struct xe_reg_mcr){.__reg.raw = reg.raw };
+}
+
+static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry)
+{
+	struct xe_reg reg = entry->reg;
+	struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg);
+	u32 val;
+
+	/*
+	 * If this is a masked register, need to set the upper 16 bits.
+	 * Set them to clr_bits since that is always a superset of the bits
+	 * being modified.
+	 *
+	 * When it's not masked, we have to read it from hardware, unless we are
+	 * supposed to set all bits.
+	 */
+	if (reg.masked)
+		val = entry->clr_bits << 16;
+	else if (entry->clr_bits + 1)
+		val = (reg.mcr ?
+		       xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
+		       xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+	else
+		val = 0;
+
+	/*
+	 * TODO: add selftest to validate all tables, regardless of platform:
+	 *   - Masked registers can't have set_bits with upper bits set
+	 *   - set_bits must be contained in clr_bits
+	 */
+	val |= entry->set_bits;
+
+	xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val);
+
+	if (entry->reg.mcr)
+		xe_gt_mcr_multicast_write(gt, reg_mcr, val);
+	else
+		xe_mmio_write32(gt, reg, val);
+}
+
+void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
+{
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+	int err;
+
+	if (xa_empty(&sr->xa))
+		return;
+
+	xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name);
+
+	err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_force_wake;
+
+	xa_for_each(&sr->xa, reg, entry)
+		apply_one_mmio(gt, entry);
+
+	err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+
+	return;
+
+err_force_wake:
+	xe_gt_err(gt, "Failed to apply, err=%d\n", err);
+}
+
+void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
+{
+	struct xe_reg_sr *sr = &hwe->reg_whitelist;
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_reg_sr_entry *entry;
+	struct drm_printer p;
+	u32 mmio_base = hwe->mmio_base;
+	unsigned long reg;
+	unsigned int slot = 0;
+	int err;
+
+	if (xa_empty(&sr->xa))
+		return;
+
+	drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name);
+
+	err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	if (err)
+		goto err_force_wake;
+
+	p = drm_debug_printer(KBUILD_MODNAME);
+	xa_for_each(&sr->xa, reg, entry) {
+		if (slot == RING_MAX_NONPRIV_SLOTS) {
+			xe_gt_err(gt,
+				  "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n",
+				  hwe->name, RING_MAX_NONPRIV_SLOTS);
+			break;
+		}
+
+		xe_reg_whitelist_print_entry(&p, 0, reg, entry);
+		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot),
+				reg | entry->set_bits);
+		slot++;
+	}
+
+	/* And clear the rest just in case of garbage */
+	for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) {
+		u32 addr = RING_NOPID(mmio_base).addr;
+
+		xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
+	}
+
+	err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+	XE_WARN_ON(err);
+
+	return;
+
+err_force_wake:
+	drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
+}
+
+/**
+ * xe_reg_sr_dump - print all save/restore entries
+ * @sr: Save/restore entries
+ * @p: DRM printer
+ */
+void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p)
+{
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+
+	if (!sr->name || xa_empty(&sr->xa))
+		return;
+
+	drm_printf(p, "%s\n", sr->name);
+	xa_for_each(&sr->xa, reg, entry)
+		drm_printf(p, "\tREG[0x%lx] clr=0x%08x set=0x%08x masked=%s mcr=%s\n",
+			   reg, entry->clr_bits, entry->set_bits,
+			   str_yes_no(entry->reg.masked),
+			   str_yes_no(entry->reg.mcr));
+}
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h
new file mode 100644
index 000000000000..e3197c33afe2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_REG_SR_
+#define _XE_REG_SR_
+
+#include "xe_reg_sr_types.h"
+
+/*
+ * Reg save/restore bookkeeping
+ */
+
+struct xe_device;
+struct xe_gt;
+struct xe_hw_engine;
+struct drm_printer;
+
+int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe);
+void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p);
+
+int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e,
+		  struct xe_gt *gt);
+void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt);
+void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h
new file mode 100644
index 000000000000..ad48a52b824a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_REG_SR_TYPES_
+#define _XE_REG_SR_TYPES_
+
+#include <linux/types.h>
+#include <linux/xarray.h>
+
+#include "regs/xe_reg_defs.h"
+
+struct xe_reg_sr_entry {
+	struct xe_reg	reg;
+	u32		clr_bits;
+	u32		set_bits;
+	/* Mask for bits to consider when reading value back */
+	u32		read_mask;
+};
+
+struct xe_reg_sr {
+	struct {
+		struct xe_reg_sr_entry *arr;
+		unsigned int used;
+		unsigned int allocated;
+		unsigned int grow_step;
+	} pool;
+	struct xarray xa;
+	const char *name;
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+	unsigned int errors;
+#endif
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
new file mode 100644
index 000000000000..e66ae1bdaf9c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_reg_whitelist.h"
+
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "xe_gt_types.h"
+#include "xe_platform_types.h"
+#include "xe_rtp.h"
+
+#undef XE_REG_MCR
+#define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
+
+static bool match_not_render(const struct xe_gt *gt,
+			     const struct xe_hw_engine *hwe)
+{
+	return hwe->class != XE_ENGINE_CLASS_RENDER;
+}
+
+static const struct xe_rtp_entry_sr register_whitelist[] = {
+	{ XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(WHITELIST(PS_INVOCATION_COUNT,
+				   RING_FORCE_TO_NONPRIV_ACCESS_RD |
+				   RING_FORCE_TO_NONPRIV_RANGE_4))
+	},
+	{ XE_RTP_NAME("1508744258, 14012131227, 1808121037"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(WHITELIST(COMMON_SLICE_CHICKEN1, 0))
+	},
+	{ XE_RTP_NAME("1806527549"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(WHITELIST(HIZ_CHICKEN, 0))
+	},
+	{ XE_RTP_NAME("allow_read_ctx_timestamp"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260), FUNC(match_not_render)),
+	  XE_RTP_ACTIONS(WHITELIST(RING_CTX_TIMESTAMP(0),
+				RING_FORCE_TO_NONPRIV_ACCESS_RD,
+				XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
+	{ XE_RTP_NAME("16014440446"),
+	  XE_RTP_RULES(PLATFORM(PVC)),
+	  XE_RTP_ACTIONS(WHITELIST(XE_REG(0x4400),
+				   RING_FORCE_TO_NONPRIV_DENY |
+				   RING_FORCE_TO_NONPRIV_RANGE_64),
+			 WHITELIST(XE_REG(0x4500),
+				   RING_FORCE_TO_NONPRIV_DENY |
+				   RING_FORCE_TO_NONPRIV_RANGE_64))
+	},
+	{ XE_RTP_NAME("16017236439"),
+	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COPY)),
+	  XE_RTP_ACTIONS(WHITELIST(BCS_SWCTRL(0),
+				   RING_FORCE_TO_NONPRIV_DENY,
+				   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
+	{}
+};
+
+/**
+ * xe_reg_whitelist_process_engine - process table of registers to whitelist
+ * @hwe: engine instance to process whitelist for
+ *
+ * Process wwhitelist table for this platform, saving in @hwe all the
+ * registers that need to be whitelisted by the hardware so they can be accessed
+ * by userspace.
+ */
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist);
+}
+
+/**
+ * xe_reg_whitelist_print_entry - print one whitelist entry
+ * @p: DRM printer
+ * @indent: indent level
+ * @reg: register allowed/denied
+ * @entry: save-restore entry
+ *
+ * Print details about the entry added to allow/deny access
+ */
+void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
+				  u32 reg, struct xe_reg_sr_entry *entry)
+{
+	u32 val = entry->set_bits;
+	const char *access_str = "(invalid)";
+	unsigned int range_bit = 2;
+	u32 range_start, range_end;
+	bool deny;
+
+	deny = val & RING_FORCE_TO_NONPRIV_DENY;
+
+	switch (val & RING_FORCE_TO_NONPRIV_RANGE_MASK) {
+	case RING_FORCE_TO_NONPRIV_RANGE_4:
+		range_bit = 4;
+		break;
+	case RING_FORCE_TO_NONPRIV_RANGE_16:
+		range_bit = 6;
+		break;
+	case RING_FORCE_TO_NONPRIV_RANGE_64:
+		range_bit = 8;
+		break;
+	}
+
+	range_start = reg & REG_GENMASK(25, range_bit);
+	range_end = range_start | REG_GENMASK(range_bit, 0);
+
+	switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) {
+	case RING_FORCE_TO_NONPRIV_ACCESS_RW:
+		access_str = "rw";
+		break;
+	case RING_FORCE_TO_NONPRIV_ACCESS_RD:
+		access_str = "read";
+		break;
+	case RING_FORCE_TO_NONPRIV_ACCESS_WR:
+		access_str = "write";
+		break;
+	}
+
+	drm_printf_indent(p, indent, "REG[0x%x-0x%x]: %s %s access\n",
+			  range_start, range_end,
+			  deny ? "deny" : "allow",
+			  access_str);
+}
+
+/**
+ * xe_reg_whitelist_dump - print all whitelist entries
+ * @sr: Save/restore entries
+ * @p: DRM printer
+ */
+void xe_reg_whitelist_dump(struct xe_reg_sr *sr, struct drm_printer *p)
+{
+	struct xe_reg_sr_entry *entry;
+	unsigned long reg;
+
+	if (!sr->name || xa_empty(&sr->xa))
+		return;
+
+	drm_printf(p, "%s\n", sr->name);
+	xa_for_each(&sr->xa, reg, entry)
+		xe_reg_whitelist_print_entry(p, 1, reg, entry);
+}
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h
new file mode 100644
index 000000000000..69b121d377da
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_REG_WHITELIST_
+#define _XE_REG_WHITELIST_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_hw_engine;
+struct xe_reg_sr;
+struct xe_reg_sr_entry;
+
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
+
+void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
+				  u32 reg, struct xe_reg_sr_entry *entry);
+
+void xe_reg_whitelist_dump(struct xe_reg_sr *sr, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
new file mode 100644
index 000000000000..0a306963aa8e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _XE_RES_CURSOR_H_
+#define _XE_RES_CURSOR_H_
+
+#include <linux/scatterlist.h>
+
+#include <drm/drm_mm.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_resource.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_macros.h"
+#include "xe_ttm_vram_mgr.h"
+
+/* state back for walking over vram_mgr, stolen_mgr, and gtt_mgr allocations */
+struct xe_res_cursor {
+	u64 start;
+	u64 size;
+	u64 remaining;
+	void *node;
+	u32 mem_type;
+	struct scatterlist *sgl;
+	struct drm_buddy *mm;
+};
+
+static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res)
+{
+	struct ttm_resource_manager *mgr;
+
+	mgr = ttm_manager_type(res->bo->bdev, res->mem_type);
+	return &to_xe_ttm_vram_mgr(mgr)->mm;
+}
+
+/**
+ * xe_res_first - initialize a xe_res_cursor
+ *
+ * @res: TTM resource object to walk
+ * @start: Start of the range
+ * @size: Size of the range
+ * @cur: cursor object to initialize
+ *
+ * Start walking over the range of allocations between @start and @size.
+ */
+static inline void xe_res_first(struct ttm_resource *res,
+				u64 start, u64 size,
+				struct xe_res_cursor *cur)
+{
+	cur->sgl = NULL;
+	if (!res)
+		goto fallback;
+
+	XE_WARN_ON(start + size > res->size);
+
+	cur->mem_type = res->mem_type;
+
+	switch (cur->mem_type) {
+	case XE_PL_STOLEN:
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1: {
+		struct drm_buddy_block *block;
+		struct list_head *head, *next;
+		struct drm_buddy *mm = xe_res_get_buddy(res);
+
+		head = &to_xe_ttm_vram_mgr_resource(res)->blocks;
+
+		block = list_first_entry_or_null(head,
+						 struct drm_buddy_block,
+						 link);
+		if (!block)
+			goto fallback;
+
+		while (start >= drm_buddy_block_size(mm, block)) {
+			start -= drm_buddy_block_size(mm, block);
+
+			next = block->link.next;
+			if (next != head)
+				block = list_entry(next, struct drm_buddy_block,
+						   link);
+		}
+
+		cur->mm = mm;
+		cur->start = drm_buddy_block_offset(block) + start;
+		cur->size = min(drm_buddy_block_size(mm, block) - start,
+				size);
+		cur->remaining = size;
+		cur->node = block;
+		break;
+	}
+	default:
+		goto fallback;
+	}
+
+	return;
+
+fallback:
+	cur->start = start;
+	cur->size = size;
+	cur->remaining = size;
+	cur->node = NULL;
+	cur->mem_type = XE_PL_TT;
+	XE_WARN_ON(res && start + size > res->size);
+}
+
+static inline void __xe_res_sg_next(struct xe_res_cursor *cur)
+{
+	struct scatterlist *sgl = cur->sgl;
+	u64 start = cur->start;
+
+	while (start >= sg_dma_len(sgl)) {
+		start -= sg_dma_len(sgl);
+		sgl = sg_next(sgl);
+		XE_WARN_ON(!sgl);
+	}
+
+	cur->start = start;
+	cur->size = sg_dma_len(sgl) - start;
+	cur->sgl = sgl;
+}
+
+/**
+ * xe_res_first_sg - initialize a xe_res_cursor with a scatter gather table
+ *
+ * @sg: scatter gather table to walk
+ * @start: Start of the range
+ * @size: Size of the range
+ * @cur: cursor object to initialize
+ *
+ * Start walking over the range of allocations between @start and @size.
+ */
+static inline void xe_res_first_sg(const struct sg_table *sg,
+				   u64 start, u64 size,
+				   struct xe_res_cursor *cur)
+{
+	XE_WARN_ON(!sg);
+	XE_WARN_ON(!IS_ALIGNED(start, PAGE_SIZE) ||
+		   !IS_ALIGNED(size, PAGE_SIZE));
+	cur->node = NULL;
+	cur->start = start;
+	cur->remaining = size;
+	cur->size = 0;
+	cur->sgl = sg->sgl;
+	cur->mem_type = XE_PL_TT;
+	__xe_res_sg_next(cur);
+}
+
+/**
+ * xe_res_next - advance the cursor
+ *
+ * @cur: the cursor to advance
+ * @size: number of bytes to move forward
+ *
+ * Move the cursor @size bytes forwrad, walking to the next node if necessary.
+ */
+static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
+{
+	struct drm_buddy_block *block;
+	struct list_head *next;
+	u64 start;
+
+	XE_WARN_ON(size > cur->remaining);
+
+	cur->remaining -= size;
+	if (!cur->remaining)
+		return;
+
+	if (cur->size > size) {
+		cur->size -= size;
+		cur->start += size;
+		return;
+	}
+
+	if (cur->sgl) {
+		cur->start += size;
+		__xe_res_sg_next(cur);
+		return;
+	}
+
+	switch (cur->mem_type) {
+	case XE_PL_STOLEN:
+	case XE_PL_VRAM0:
+	case XE_PL_VRAM1:
+		start = size - cur->size;
+		block = cur->node;
+
+		next = block->link.next;
+		block = list_entry(next, struct drm_buddy_block, link);
+
+
+		while (start >= drm_buddy_block_size(cur->mm, block)) {
+			start -= drm_buddy_block_size(cur->mm, block);
+
+			next = block->link.next;
+			block = list_entry(next, struct drm_buddy_block, link);
+		}
+
+		cur->start = drm_buddy_block_offset(block) + start;
+		cur->size = min(drm_buddy_block_size(cur->mm, block) - start,
+				cur->remaining);
+		cur->node = block;
+		break;
+	default:
+		return;
+	}
+}
+
+/**
+ * xe_res_dma - return dma address of cursor at current position
+ *
+ * @cur: the cursor to return the dma address from
+ */
+static inline u64 xe_res_dma(const struct xe_res_cursor *cur)
+{
+	return cur->sgl ? sg_dma_address(cur->sgl) + cur->start : cur->start;
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
new file mode 100644
index 000000000000..1e4c06eacd98
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_ring_ops.h"
+
+#include "generated/xe_wa_oob.h"
+#include "instructions/xe_mi_commands.h"
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gpu_commands.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_lrc_layout.h"
+#include "xe_exec_queue_types.h"
+#include "xe_gt.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_sched_job.h"
+#include "xe_vm_types.h"
+#include "xe_vm.h"
+#include "xe_wa.h"
+
+/*
+ * 3D-related flags that can't be set on _engines_ that lack access to the 3D
+ * pipeline (i.e., CCS engines).
+ */
+#define PIPE_CONTROL_3D_ENGINE_FLAGS (\
+		PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \
+		PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
+		PIPE_CONTROL_TILE_CACHE_FLUSH | \
+		PIPE_CONTROL_DEPTH_STALL | \
+		PIPE_CONTROL_STALL_AT_SCOREBOARD | \
+		PIPE_CONTROL_PSD_SYNC | \
+		PIPE_CONTROL_AMFS_FLUSH | \
+		PIPE_CONTROL_VF_CACHE_INVALIDATE | \
+		PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET)
+
+/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */
+#define PIPE_CONTROL_3D_ARCH_FLAGS ( \
+		PIPE_CONTROL_3D_ENGINE_FLAGS | \
+		PIPE_CONTROL_INDIRECT_STATE_DISABLE | \
+		PIPE_CONTROL_FLUSH_ENABLE | \
+		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
+		PIPE_CONTROL_DC_FLUSH_ENABLE)
+
+static u32 preparser_disable(bool state)
+{
+	return MI_ARB_CHECK | BIT(8) | state;
+}
+
+static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg,
+			      u32 *dw, int i)
+{
+	dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_MMIO_REMAP_EN;
+	dw[i++] = reg.addr + gt->mmio.adj_offset;
+	dw[i++] = AUX_INV;
+	dw[i++] = MI_NOOP;
+
+	return i;
+}
+
+static int emit_user_interrupt(u32 *dw, int i)
+{
+	dw[i++] = MI_USER_INTERRUPT;
+	dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	dw[i++] = MI_ARB_CHECK;
+
+	return i;
+}
+
+static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
+{
+	dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
+	dw[i++] = addr;
+	dw[i++] = 0;
+	dw[i++] = value;
+
+	return i;
+}
+
+static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
+			       u32 *dw, int i)
+{
+	dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW |
+		(invalidate_tlb ? MI_INVALIDATE_TLB : 0);
+	dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
+	dw[i++] = 0;
+	dw[i++] = value;
+
+	return i;
+}
+
+static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
+{
+	dw[i++] = MI_BATCH_BUFFER_START | ppgtt_flag | XE_INSTR_NUM_DW(3);
+	dw[i++] = lower_32_bits(batch_addr);
+	dw[i++] = upper_32_bits(batch_addr);
+
+	return i;
+}
+
+static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
+{
+	dw[i] = MI_FLUSH_DW;
+	dw[i] |= flag;
+	dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW |
+		MI_FLUSH_DW_STORE_INDEX;
+
+	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+	dw[i++] = 0;
+	dw[i++] = ~0U;
+
+	return i;
+}
+
+static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
+				int i)
+{
+	u32 flags = PIPE_CONTROL_CS_STALL |
+		PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
+		PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
+		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+		PIPE_CONTROL_VF_CACHE_INVALIDATE |
+		PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+		PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+		PIPE_CONTROL_QW_WRITE |
+		PIPE_CONTROL_STORE_DATA_INDEX;
+
+	if (invalidate_tlb)
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+
+	flags &= ~mask_flags;
+
+	dw[i++] = GFX_OP_PIPE_CONTROL(6);
+	dw[i++] = flags;
+	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+
+	return i;
+}
+
+static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
+				       u32 *dw, int i)
+{
+	dw[i++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(1);
+	dw[i++] = lower_32_bits(addr);
+	dw[i++] = upper_32_bits(addr);
+	dw[i++] = lower_32_bits(value);
+	dw[i++] = upper_32_bits(value);
+
+	return i;
+}
+
+static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
+{
+	struct xe_gt *gt = job->q->gt;
+	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
+	u32 flags;
+
+	flags = (PIPE_CONTROL_CS_STALL |
+		 PIPE_CONTROL_TILE_CACHE_FLUSH |
+		 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		 PIPE_CONTROL_DC_FLUSH_ENABLE |
+		 PIPE_CONTROL_FLUSH_ENABLE);
+
+	if (XE_WA(gt, 1409600907))
+		flags |= PIPE_CONTROL_DEPTH_STALL;
+
+	if (lacks_render)
+		flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+	else if (job->q->class == XE_ENGINE_CLASS_COMPUTE)
+		flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
+
+	dw[i++] = GFX_OP_PIPE_CONTROL(6) | PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+	dw[i++] = flags;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+
+	return i;
+}
+
+static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int i)
+{
+	if (hwe->class != XE_ENGINE_CLASS_RENDER)
+		return i;
+
+	if (XE_WA(hwe->gt, 16020292621)) {
+		dw[i++] = GFX_OP_PIPE_CONTROL(6);
+		dw[i++] = PIPE_CONTROL_LRI_POST_SYNC;
+		dw[i++] = RING_NOPID(hwe->mmio_base).addr;
+		dw[i++] = 0;
+		dw[i++] = 0;
+		dw[i++] = 0;
+	}
+
+	return i;
+}
+
+static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
+			      int i)
+{
+	dw[i++] = GFX_OP_PIPE_CONTROL(6);
+	dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL :
+		   PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) |
+		PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE;
+	dw[i++] = addr;
+	dw[i++] = 0;
+	dw[i++] = value;
+	dw[i++] = 0; /* We're thrashing one extra dword. */
+
+	return i;
+}
+
+static u32 get_ppgtt_flag(struct xe_sched_job *job)
+{
+	return job->q->vm ? BIT(8) : 0;
+}
+
+/* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */
+static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc,
+				    u64 batch_addr, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+	u32 ppgtt_flag = get_ppgtt_flag(job);
+	struct xe_vm *vm = job->q->vm;
+	struct xe_gt *gt = job->q->gt;
+
+	if (vm && vm->batch_invalidate_tlb) {
+		dw[i++] = preparser_disable(true);
+		i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+					seqno, true, dw, i);
+		dw[i++] = preparser_disable(false);
+	} else {
+		i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+					seqno, dw, i);
+	}
+
+	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+	if (job->user_fence.used)
+		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+						job->user_fence.value,
+						dw, i);
+
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
+
+	i = emit_user_interrupt(dw, i);
+
+	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static bool has_aux_ccs(struct xe_device *xe)
+{
+	/*
+	 * PVC is a special case that has no compression of either type
+	 * (FlatCCS or AuxCCS).  Also, AuxCCS is no longer used from Xe2
+	 * onward, so any future platforms with no FlatCCS will not have
+	 * AuxCCS either.
+	 */
+	if (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC)
+		return false;
+
+	return !xe->info.has_flat_ccs;
+}
+
+static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
+				   u64 batch_addr, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+	u32 ppgtt_flag = get_ppgtt_flag(job);
+	struct xe_gt *gt = job->q->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
+	struct xe_vm *vm = job->q->vm;
+
+	dw[i++] = preparser_disable(true);
+
+	/* hsdes: 1809175790 */
+	if (has_aux_ccs(xe)) {
+		if (decode)
+			i = emit_aux_table_inv(gt, VD0_AUX_INV, dw, i);
+		else
+			i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
+	}
+
+	if (vm && vm->batch_invalidate_tlb)
+		i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+					seqno, true, dw, i);
+
+	dw[i++] = preparser_disable(false);
+
+	if (!vm || !vm->batch_invalidate_tlb)
+		i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+					seqno, dw, i);
+
+	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+	if (job->user_fence.used)
+		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+						job->user_fence.value,
+						dw, i);
+
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
+
+	i = emit_user_interrupt(dw, i);
+
+	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
+					    struct xe_lrc *lrc,
+					    u64 batch_addr, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+	u32 ppgtt_flag = get_ppgtt_flag(job);
+	struct xe_gt *gt = job->q->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
+	struct xe_vm *vm = job->q->vm;
+	u32 mask_flags = 0;
+
+	dw[i++] = preparser_disable(true);
+	if (lacks_render)
+		mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
+	else if (job->q->class == XE_ENGINE_CLASS_COMPUTE)
+		mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
+
+	/* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
+	i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i);
+
+	/* hsdes: 1809175790 */
+	if (has_aux_ccs(xe))
+		i = emit_aux_table_inv(gt, CCS_AUX_INV, dw, i);
+
+	dw[i++] = preparser_disable(false);
+
+	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+				seqno, dw, i);
+
+	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+	i = emit_render_cache_flush(job, dw, i);
+
+	if (job->user_fence.used)
+		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+						job->user_fence.value,
+						dw, i);
+
+	i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i);
+
+	i = emit_user_interrupt(dw, i);
+
+	i = emit_pipe_control_to_ring_end(job->q->hwe, dw, i);
+
+	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void emit_migration_job_gen12(struct xe_sched_job *job,
+				     struct xe_lrc *lrc, u32 seqno)
+{
+	u32 dw[MAX_JOB_SIZE_DW], i = 0;
+
+	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+				seqno, dw, i);
+
+	dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */
+
+	i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i);
+
+	/* XXX: Do we need this? Leaving for now. */
+	dw[i++] = preparser_disable(true);
+	i = emit_flush_invalidate(0, dw, i);
+	dw[i++] = preparser_disable(false);
+
+	i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i);
+
+	dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags |
+		MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW;
+	dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT;
+	dw[i++] = 0;
+	dw[i++] = seqno; /* value */
+
+	i = emit_user_interrupt(dw, i);
+
+	xe_gt_assert(job->q->gt, i <= MAX_JOB_SIZE_DW);
+
+	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void emit_job_gen12_gsc(struct xe_sched_job *job)
+{
+	struct xe_gt *gt = job->q->gt;
+
+	xe_gt_assert(gt, job->q->width <= 1); /* no parallel submission for GSCCS */
+
+	__emit_job_gen12_simple(job, job->q->lrc,
+				job->batch_addr[0],
+				xe_sched_job_seqno(job));
+}
+
+static void emit_job_gen12_copy(struct xe_sched_job *job)
+{
+	int i;
+
+	if (xe_sched_job_is_migration(job->q)) {
+		emit_migration_job_gen12(job, job->q->lrc,
+					 xe_sched_job_seqno(job));
+		return;
+	}
+
+	for (i = 0; i < job->q->width; ++i)
+		__emit_job_gen12_simple(job, job->q->lrc + i,
+				        job->batch_addr[i],
+				        xe_sched_job_seqno(job));
+}
+
+static void emit_job_gen12_video(struct xe_sched_job *job)
+{
+	int i;
+
+	/* FIXME: Not doing parallel handshake for now */
+	for (i = 0; i < job->q->width; ++i)
+		__emit_job_gen12_video(job, job->q->lrc + i,
+				       job->batch_addr[i],
+				       xe_sched_job_seqno(job));
+}
+
+static void emit_job_gen12_render_compute(struct xe_sched_job *job)
+{
+	int i;
+
+	for (i = 0; i < job->q->width; ++i)
+		__emit_job_gen12_render_compute(job, job->q->lrc + i,
+						job->batch_addr[i],
+						xe_sched_job_seqno(job));
+}
+
+static const struct xe_ring_ops ring_ops_gen12_gsc = {
+	.emit_job = emit_job_gen12_gsc,
+};
+
+static const struct xe_ring_ops ring_ops_gen12_copy = {
+	.emit_job = emit_job_gen12_copy,
+};
+
+static const struct xe_ring_ops ring_ops_gen12_video = {
+	.emit_job = emit_job_gen12_video,
+};
+
+static const struct xe_ring_ops ring_ops_gen12_render_compute = {
+	.emit_job = emit_job_gen12_render_compute,
+};
+
+const struct xe_ring_ops *
+xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_OTHER:
+		return &ring_ops_gen12_gsc;
+	case XE_ENGINE_CLASS_COPY:
+		return &ring_ops_gen12_copy;
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		return &ring_ops_gen12_video;
+	case XE_ENGINE_CLASS_RENDER:
+	case XE_ENGINE_CLASS_COMPUTE:
+		return &ring_ops_gen12_render_compute;
+	default:
+		return NULL;
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.h b/drivers/gpu/drm/xe/xe_ring_ops.h
new file mode 100644
index 000000000000..e942735d76a6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RING_OPS_H_
+#define _XE_RING_OPS_H_
+
+#include "xe_hw_engine_types.h"
+#include "xe_ring_ops_types.h"
+
+struct xe_gt;
+
+const struct xe_ring_ops *
+xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h
new file mode 100644
index 000000000000..1ae56e2ee7b4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RING_OPS_TYPES_H_
+#define _XE_RING_OPS_TYPES_H_
+
+struct xe_sched_job;
+
+#define MAX_JOB_SIZE_DW 48
+#define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4)
+
+/**
+ * struct xe_ring_ops - Ring operations
+ */
+struct xe_ring_ops {
+	/** @emit_job: Write job to ring */
+	void (*emit_job)(struct xe_sched_job *job);
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
new file mode 100644
index 000000000000..fb44cc7521d8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_rtp.h"
+
+#include <kunit/visibility.h>
+
+#include <drm/xe_drm.h>
+
+#include "xe_gt.h"
+#include "xe_gt_topology.h"
+#include "xe_macros.h"
+#include "xe_reg_sr.h"
+
+/**
+ * DOC: Register Table Processing
+ *
+ * Internal infrastructure to define how registers should be updated based on
+ * rules and actions. This can be used to define tables with multiple entries
+ * (one per register) that will be walked over at some point in time to apply
+ * the values to the registers that have matching rules.
+ */
+
+static bool has_samedia(const struct xe_device *xe)
+{
+	return xe->info.media_verx100 >= 1300;
+}
+
+static bool rule_matches(const struct xe_device *xe,
+			 struct xe_gt *gt,
+			 struct xe_hw_engine *hwe,
+			 const struct xe_rtp_rule *rules,
+			 unsigned int n_rules)
+{
+	const struct xe_rtp_rule *r;
+	unsigned int i;
+	bool match;
+
+	for (r = rules, i = 0; i < n_rules; r = &rules[++i]) {
+		switch (r->match_type) {
+		case XE_RTP_MATCH_PLATFORM:
+			match = xe->info.platform == r->platform;
+			break;
+		case XE_RTP_MATCH_SUBPLATFORM:
+			match = xe->info.platform == r->platform &&
+				xe->info.subplatform == r->subplatform;
+			break;
+		case XE_RTP_MATCH_GRAPHICS_VERSION:
+			match = xe->info.graphics_verx100 == r->ver_start &&
+				(!has_samedia(xe) || !xe_gt_is_media_type(gt));
+			break;
+		case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE:
+			match = xe->info.graphics_verx100 >= r->ver_start &&
+				xe->info.graphics_verx100 <= r->ver_end &&
+				(!has_samedia(xe) || !xe_gt_is_media_type(gt));
+			break;
+		case XE_RTP_MATCH_GRAPHICS_STEP:
+			match = xe->info.step.graphics >= r->step_start &&
+				xe->info.step.graphics < r->step_end &&
+				(!has_samedia(xe) || !xe_gt_is_media_type(gt));
+			break;
+		case XE_RTP_MATCH_MEDIA_VERSION:
+			match = xe->info.media_verx100 == r->ver_start &&
+				(!has_samedia(xe) || xe_gt_is_media_type(gt));
+			break;
+		case XE_RTP_MATCH_MEDIA_VERSION_RANGE:
+			match = xe->info.media_verx100 >= r->ver_start &&
+				xe->info.media_verx100 <= r->ver_end &&
+				(!has_samedia(xe) || xe_gt_is_media_type(gt));
+			break;
+		case XE_RTP_MATCH_MEDIA_STEP:
+			match = xe->info.step.media >= r->step_start &&
+				xe->info.step.media < r->step_end &&
+				(!has_samedia(xe) || xe_gt_is_media_type(gt));
+			break;
+		case XE_RTP_MATCH_INTEGRATED:
+			match = !xe->info.is_dgfx;
+			break;
+		case XE_RTP_MATCH_DISCRETE:
+			match = xe->info.is_dgfx;
+			break;
+		case XE_RTP_MATCH_ENGINE_CLASS:
+			if (drm_WARN_ON(&xe->drm, !hwe))
+				return false;
+
+			match = hwe->class == r->engine_class;
+			break;
+		case XE_RTP_MATCH_NOT_ENGINE_CLASS:
+			if (drm_WARN_ON(&xe->drm, !hwe))
+				return false;
+
+			match = hwe->class != r->engine_class;
+			break;
+		case XE_RTP_MATCH_FUNC:
+			match = r->match_func(gt, hwe);
+			break;
+		default:
+			drm_warn(&xe->drm, "Invalid RTP match %u\n",
+				 r->match_type);
+			match = false;
+		}
+
+		if (!match)
+			return false;
+	}
+
+	return true;
+}
+
+static void rtp_add_sr_entry(const struct xe_rtp_action *action,
+			     struct xe_gt *gt,
+			     u32 mmio_base,
+			     struct xe_reg_sr *sr)
+{
+	struct xe_reg_sr_entry sr_entry = {
+		.reg = action->reg,
+		.clr_bits = action->clr_bits,
+		.set_bits = action->set_bits,
+		.read_mask = action->read_mask,
+	};
+
+	sr_entry.reg.addr += mmio_base;
+	xe_reg_sr_add(sr, &sr_entry, gt);
+}
+
+static bool rtp_process_one_sr(const struct xe_rtp_entry_sr *entry,
+			       struct xe_device *xe, struct xe_gt *gt,
+			       struct xe_hw_engine *hwe, struct xe_reg_sr *sr)
+{
+	const struct xe_rtp_action *action;
+	u32 mmio_base;
+	unsigned int i;
+
+	if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules))
+		return false;
+
+	for (i = 0, action = &entry->actions[0]; i < entry->n_actions; action++, i++) {
+		if ((entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) ||
+		    (action->flags & XE_RTP_ACTION_FLAG_ENGINE_BASE))
+			mmio_base = hwe->mmio_base;
+		else
+			mmio_base = 0;
+
+		rtp_add_sr_entry(action, gt, mmio_base, sr);
+	}
+
+	return true;
+}
+
+static void rtp_get_context(struct xe_rtp_process_ctx *ctx,
+			    struct xe_hw_engine **hwe,
+			    struct xe_gt **gt,
+			    struct xe_device **xe)
+{
+	switch (ctx->type) {
+	case XE_RTP_PROCESS_TYPE_GT:
+		*hwe = NULL;
+		*gt = ctx->gt;
+		*xe = gt_to_xe(*gt);
+		break;
+	case XE_RTP_PROCESS_TYPE_ENGINE:
+		*hwe = ctx->hwe;
+		*gt = (*hwe)->gt;
+		*xe = gt_to_xe(*gt);
+		break;
+	};
+}
+
+/**
+ * xe_rtp_process_ctx_enable_active_tracking - Enable tracking of active entries
+ *
+ * Set additional metadata to track what entries are considered "active", i.e.
+ * their rules match the condition. Bits are never cleared: entries with
+ * matching rules set the corresponding bit in the bitmap.
+ *
+ * @ctx: The context for processing the table
+ * @active_entries: bitmap to store the active entries
+ * @n_entries: number of entries to be processed
+ */
+void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
+					       unsigned long *active_entries,
+					       size_t n_entries)
+{
+	ctx->active_entries = active_entries;
+	ctx->n_entries = n_entries;
+}
+
+static void rtp_mark_active(struct xe_device *xe,
+			    struct xe_rtp_process_ctx *ctx,
+			    unsigned int first, unsigned int last)
+{
+	if (!ctx->active_entries)
+		return;
+
+	if (drm_WARN_ON(&xe->drm, last > ctx->n_entries))
+		return;
+
+	if (first == last)
+		bitmap_set(ctx->active_entries, first, 1);
+	else
+		bitmap_set(ctx->active_entries, first, last - first + 2);
+}
+
+/**
+ * xe_rtp_process_to_sr - Process all rtp @entries, adding the matching ones to
+ *                        the save-restore argument.
+ * @ctx: The context for processing the table, with one of device, gt or hwe
+ * @entries: Table with RTP definitions
+ * @sr: Save-restore struct where matching rules execute the action. This can be
+ *      viewed as the "coalesced view" of multiple the tables. The bits for each
+ *      register set are expected not to collide with previously added entries
+ *
+ * Walk the table pointed by @entries (with an empty sentinel) and add all
+ * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is
+ * used to calculate the right register offset
+ */
+void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
+			  const struct xe_rtp_entry_sr *entries,
+			  struct xe_reg_sr *sr)
+{
+	const struct xe_rtp_entry_sr *entry;
+	struct xe_hw_engine *hwe = NULL;
+	struct xe_gt *gt = NULL;
+	struct xe_device *xe = NULL;
+
+	rtp_get_context(ctx, &hwe, &gt, &xe);
+
+	for (entry = entries; entry && entry->name; entry++) {
+		bool match = false;
+
+		if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) {
+			struct xe_hw_engine *each_hwe;
+			enum xe_hw_engine_id id;
+
+			for_each_hw_engine(each_hwe, gt, id)
+				match |= rtp_process_one_sr(entry, xe, gt,
+							    each_hwe, sr);
+		} else {
+			match = rtp_process_one_sr(entry, xe, gt, hwe, sr);
+		}
+
+		if (match)
+			rtp_mark_active(xe, ctx, entry - entries,
+					entry - entries);
+	}
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr);
+
+/**
+ * xe_rtp_process - Process all rtp @entries, without running any action
+ * @ctx: The context for processing the table, with one of device, gt or hwe
+ * @entries: Table with RTP definitions
+ *
+ * Walk the table pointed by @entries (with an empty sentinel), executing the
+ * rules. A few differences from xe_rtp_process_to_sr():
+ *
+ * 1. There is no action associated with each entry since this uses
+ *    struct xe_rtp_entry. Its main use is for marking active workarounds via
+ *    xe_rtp_process_ctx_enable_active_tracking().
+ * 2. There is support for OR operations by having entries with no name.
+ */
+void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
+		    const struct xe_rtp_entry *entries)
+{
+	const struct xe_rtp_entry *entry, *first_entry;
+	struct xe_hw_engine *hwe;
+	struct xe_gt *gt;
+	struct xe_device *xe;
+
+	rtp_get_context(ctx, &hwe, &gt, &xe);
+
+	first_entry = entries;
+	if (drm_WARN_ON(&xe->drm, !first_entry->name))
+		return;
+
+	for (entry = entries; entry && entry->rules; entry++) {
+		if (entry->name)
+			first_entry = entry;
+
+		if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules))
+			continue;
+
+		/* Fast-forward entry, eliminating the OR'ed entries */
+		for (entry++; entry && entry->rules; entry++)
+			if (entry->name)
+				break;
+		entry--;
+
+		rtp_mark_active(xe, ctx, first_entry - entries,
+				entry - entries);
+	}
+}
+
+bool xe_rtp_match_even_instance(const struct xe_gt *gt,
+				const struct xe_hw_engine *hwe)
+{
+	return hwe->instance % 2 == 0;
+}
+
+bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
+					  const struct xe_hw_engine *hwe)
+{
+	u64 render_compute_mask = gt->info.engine_mask &
+		(XE_HW_ENGINE_CCS_MASK | XE_HW_ENGINE_RCS_MASK);
+
+	return render_compute_mask &&
+		hwe->engine_id == __ffs(render_compute_mask);
+}
+
+bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
+					 const struct xe_hw_engine *hwe)
+{
+	unsigned int dss_per_gslice = 4;
+	unsigned int dss;
+
+	if (drm_WARN(&gt_to_xe(gt)->drm, xe_dss_mask_empty(gt->fuse_topo.g_dss_mask),
+		     "Checking gslice for platform without geometry pipeline\n"))
+		return false;
+
+	dss = xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0);
+
+	return dss >= dss_per_gslice;
+}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
new file mode 100644
index 000000000000..c56fedd126e6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -0,0 +1,430 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RTP_
+#define _XE_RTP_
+
+#include <linux/types.h>
+#include <linux/xarray.h>
+
+#define _XE_RTP_INCLUDE_PRIVATE_HELPERS
+
+#include "xe_rtp_helpers.h"
+#include "xe_rtp_types.h"
+
+#undef _XE_RTP_INCLUDE_PRIVATE_HELPERS
+
+/*
+ * Register table poke infrastructure
+ */
+
+struct xe_hw_engine;
+struct xe_gt;
+struct xe_reg_sr;
+
+/*
+ * Macros to encode rules to match against platform, IP version, stepping, etc.
+ * Shouldn't be used directly - see XE_RTP_RULES()
+ */
+#define _XE_RTP_RULE_PLATFORM(plat__)						\
+	{ .match_type = XE_RTP_MATCH_PLATFORM, .platform = plat__ }
+
+#define _XE_RTP_RULE_SUBPLATFORM(plat__, sub__)					\
+	{ .match_type = XE_RTP_MATCH_SUBPLATFORM,				\
+	  .platform = plat__, .subplatform = sub__ }
+
+#define _XE_RTP_RULE_GRAPHICS_STEP(start__, end__)				\
+	{ .match_type = XE_RTP_MATCH_GRAPHICS_STEP,				\
+	  .step_start = start__, .step_end = end__ }
+
+#define _XE_RTP_RULE_MEDIA_STEP(start__, end__)					\
+	{ .match_type = XE_RTP_MATCH_MEDIA_STEP,				\
+	  .step_start = start__, .step_end = end__ }
+
+#define _XE_RTP_RULE_ENGINE_CLASS(cls__)					\
+	{ .match_type = XE_RTP_MATCH_ENGINE_CLASS,				\
+	  .engine_class = (cls__) }
+
+/**
+ * XE_RTP_RULE_PLATFORM - Create rule matching platform
+ * @plat_: platform to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_PLATFORM(plat_)						\
+	_XE_RTP_RULE_PLATFORM(XE_##plat_)
+
+/**
+ * XE_RTP_RULE_SUBPLATFORM - Create rule matching platform and sub-platform
+ * @plat_: platform to match
+ * @sub_: sub-platform to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_SUBPLATFORM(plat_, sub_)					\
+	_XE_RTP_RULE_SUBPLATFORM(XE_##plat_, XE_SUBPLATFORM_##plat_##_##sub_)
+
+/**
+ * XE_RTP_RULE_GRAPHICS_STEP - Create rule matching graphics stepping
+ * @start_: First stepping matching the rule
+ * @end_: First stepping that does not match the rule
+ *
+ * Note that the range matching this rule is [ @start_, @end_ ), i.e. inclusive
+ * on the left, exclusive on the right.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_GRAPHICS_STEP(start_, end_)					\
+	_XE_RTP_RULE_GRAPHICS_STEP(STEP_##start_, STEP_##end_)
+
+/**
+ * XE_RTP_RULE_MEDIA_STEP - Create rule matching media stepping
+ * @start_: First stepping matching the rule
+ * @end_: First stepping that does not match the rule
+ *
+ * Note that the range matching this rule is [ @start_, @end_ ), i.e. inclusive
+ * on the left, exclusive on the right.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_MEDIA_STEP(start_, end_)					\
+	_XE_RTP_RULE_MEDIA_STEP(STEP_##start_, STEP_##end_)
+
+/**
+ * XE_RTP_RULE_ENGINE_CLASS - Create rule matching an engine class
+ * @cls_: Engine class to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_ENGINE_CLASS(cls_)						\
+	_XE_RTP_RULE_ENGINE_CLASS(XE_ENGINE_CLASS_##cls_)
+
+/**
+ * XE_RTP_RULE_FUNC - Create rule using callback function for match
+ * @func__: Function to call to decide if rule matches
+ *
+ * This allows more complex checks to be performed. The ``XE_RTP``
+ * infrastructure will simply call the function @func_ passed to decide if this
+ * rule matches the device.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_FUNC(func__)						\
+	{ .match_type = XE_RTP_MATCH_FUNC,					\
+	  .match_func = (func__) }
+
+/**
+ * XE_RTP_RULE_GRAPHICS_VERSION - Create rule matching graphics version
+ * @ver__: Graphics IP version to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_GRAPHICS_VERSION(ver__)					\
+	{ .match_type = XE_RTP_MATCH_GRAPHICS_VERSION,				\
+	  .ver_start = ver__, }
+
+/**
+ * XE_RTP_RULE_GRAPHICS_VERSION_RANGE - Create rule matching a range of graphics version
+ * @ver_start__: First graphics IP version to match
+ * @ver_end__: Last graphics IP version to match
+ *
+ * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
+ * inclusive on boths sides
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_GRAPHICS_VERSION_RANGE(ver_start__, ver_end__)		\
+	{ .match_type = XE_RTP_MATCH_GRAPHICS_VERSION_RANGE,			\
+	  .ver_start = ver_start__, .ver_end = ver_end__, }
+
+/**
+ * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version
+ * @ver__: Graphics IP version to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_MEDIA_VERSION(ver__)					\
+	{ .match_type = XE_RTP_MATCH_MEDIA_VERSION,				\
+	  .ver_start = ver__, }
+
+/**
+ * XE_RTP_RULE_MEDIA_VERSION_RANGE - Create rule matching a range of media version
+ * @ver_start__: First media IP version to match
+ * @ver_end__: Last media IP version to match
+ *
+ * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
+ * inclusive on boths sides
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_MEDIA_VERSION_RANGE(ver_start__, ver_end__)			\
+	{ .match_type = XE_RTP_MATCH_MEDIA_VERSION_RANGE,			\
+	  .ver_start = ver_start__, .ver_end = ver_end__, }
+
+/**
+ * XE_RTP_RULE_IS_INTEGRATED - Create a rule matching integrated graphics devices
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_IS_INTEGRATED						\
+	{ .match_type = XE_RTP_MATCH_INTEGRATED }
+
+/**
+ * XE_RTP_RULE_IS_DISCRETE - Create a rule matching discrete graphics devices
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_IS_DISCRETE							\
+	{ .match_type = XE_RTP_MATCH_DISCRETE }
+
+/**
+ * XE_RTP_ACTION_WR - Helper to write a value to the register, overriding all
+ *                    the bits
+ * @reg_: Register
+ * @val_: Value to set
+ * @...: Additional fields to override in the struct xe_rtp_action entry
+ *
+ * The correspondent notation in bspec is:
+ *
+ *	REGNAME = VALUE
+ */
+#define XE_RTP_ACTION_WR(reg_, val_, ...)					\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = ~0u, .set_bits = (val_),					\
+	  .read_mask = (~0u), ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_ACTION_SET - Set bits from @val_ in the register.
+ * @reg_: Register
+ * @val_: Bits to set in the register
+ * @...: Additional fields to override in the struct xe_rtp_action entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is (example for bits 2
+ * and 5, but could be any):
+ *
+ *	REGNAME[2] = 1
+ *	REGNAME[5] = 1
+ */
+#define XE_RTP_ACTION_SET(reg_, val_, ...)					\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = val_, .set_bits = val_,					\
+	  .read_mask = val_, ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_ACTION_CLR: Clear bits from @val_ in the register.
+ * @reg_: Register
+ * @val_: Bits to clear in the register
+ * @...: Additional fields to override in the struct xe_rtp_action entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is (example for bits 2
+ * and 5, but could be any):
+ *
+ *	REGNAME[2] = 0
+ *	REGNAME[5] = 0
+ */
+#define XE_RTP_ACTION_CLR(reg_, val_, ...)					\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = val_, .set_bits = 0,					\
+	  .read_mask = val_, ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_ACTION_FIELD_SET: Set a bit range
+ * @reg_: Register
+ * @mask_bits_: Mask of bits to be changed in the register, forming a field
+ * @val_: Value to set in the field denoted by @mask_bits_
+ * @...: Additional fields to override in the struct xe_rtp_action entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is:
+ *
+ *	REGNAME[<end>:<start>] = VALUE
+ */
+#define XE_RTP_ACTION_FIELD_SET(reg_, mask_bits_, val_, ...)			\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = mask_bits_, .set_bits = val_,				\
+	  .read_mask = mask_bits_, ##__VA_ARGS__ }
+
+#define XE_RTP_ACTION_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...)	\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = (mask_bits_), .set_bits = (val_),				\
+	  .read_mask = 0, ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_ACTION_WHITELIST - Add register to userspace whitelist
+ * @reg_: Register
+ * @val_: Whitelist-specific flags to set
+ * @...: Additional fields to override in the struct xe_rtp_action entry
+ *
+ * Add a register to the whitelist, allowing userspace to modify the ster with
+ * regular user privileges.
+ */
+#define XE_RTP_ACTION_WHITELIST(reg_, val_, ...)				\
+	/* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .set_bits = val_,							\
+	  .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID,				\
+	  ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_NAME - Helper to set the name in xe_rtp_entry
+ * @s_: Name describing this rule, often a HW-specific number
+ *
+ * TODO: maybe move this behind a debug config?
+ */
+#define XE_RTP_NAME(s_)	.name = (s_)
+
+/**
+ * XE_RTP_ENTRY_FLAG - Helper to add multiple flags to a struct xe_rtp_entry_sr
+ * @...: Entry flags, without the ``XE_RTP_ENTRY_FLAG_`` prefix
+ *
+ * Helper to automatically add a ``XE_RTP_ENTRY_FLAG_`` prefix to the flags
+ * when defining struct xe_rtp_entry entries. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry_sr wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  ...
+ *		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_ENTRY_FLAG(...)							\
+	.flags = (XE_RTP_PASTE_FOREACH(ENTRY_FLAG_, BITWISE_OR, (__VA_ARGS__)))
+
+/**
+ * XE_RTP_ACTION_FLAG - Helper to add multiple flags to a struct xe_rtp_action
+ * @...: Action flags, without the ``XE_RTP_ACTION_FLAG_`` prefix
+ *
+ * Helper to automatically add a ``XE_RTP_ACTION_FLAG_`` prefix to the flags
+ * when defining struct xe_rtp_action entries. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry_sr wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  ...
+ *		  XE_RTP_ACTION_SET(..., XE_RTP_ACTION_FLAG(FOREACH_ENGINE)),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_ACTION_FLAG(...)							\
+	.flags = (XE_RTP_PASTE_FOREACH(ACTION_FLAG_, BITWISE_OR, (__VA_ARGS__)))
+
+/**
+ * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry_sr entry
+ * @...: Rules
+ *
+ * At least one rule is needed and up to 4 are supported. Multiple rules are
+ * AND'ed together, i.e. all the rules must evaluate to true for the entry to
+ * be processed. See XE_RTP_MATCH_* for the possible match rules. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry_sr wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_RULES(...)							\
+	.n_rules = _XE_COUNT_ARGS(__VA_ARGS__),					\
+	.rules = (const struct xe_rtp_rule[]) {					\
+		XE_RTP_PASTE_FOREACH(RULE_, COMMA, (__VA_ARGS__))	\
+	}
+
+/**
+ * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry_sr
+ * @...: Actions to be taken
+ *
+ * At least one action is needed and up to 4 are supported. See XE_RTP_ACTION_*
+ * for the possible actions. Example:
+ *
+ * .. code-block:: c
+ *
+ *	const struct xe_rtp_entry_sr wa_entries[] = {
+ *		...
+ *		{ XE_RTP_NAME("test-entry"),
+ *		  XE_RTP_RULES(...),
+ *		  XE_RTP_ACTIONS(SET(..), SET(...), CLR(...)),
+ *		  ...
+ *		},
+ *		...
+ *	};
+ */
+#define XE_RTP_ACTIONS(...)							\
+	.n_actions = _XE_COUNT_ARGS(__VA_ARGS__),				\
+	.actions = (const struct xe_rtp_action[]) {				\
+		XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__))	\
+	}
+
+#define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__),							\
+	struct xe_hw_engine * :	(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE },	\
+	struct xe_gt * :	(struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT })
+
+void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
+					       unsigned long *active_entries,
+					       size_t n_entries);
+
+void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
+			  const struct xe_rtp_entry_sr *entries,
+			  struct xe_reg_sr *sr);
+
+void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
+		    const struct xe_rtp_entry *entries);
+
+/* Match functions to be used with XE_RTP_MATCH_FUNC */
+
+/**
+ * xe_rtp_match_even_instance - Match if engine instance is even
+ * @gt: GT structure
+ * @hwe: Engine instance
+ *
+ * Returns: true if engine instance is even, false otherwise
+ */
+bool xe_rtp_match_even_instance(const struct xe_gt *gt,
+				const struct xe_hw_engine *hwe);
+
+/*
+ * xe_rtp_match_first_render_or_compute - Match if it's first render or compute
+ * engine in the GT
+ *
+ * @gt: GT structure
+ * @hwe: Engine instance
+ *
+ * Registers on the render reset domain need to have their values re-applied
+ * when any of those engines are reset. Since the engines reset together, a
+ * programming can be set to just one of them. For simplicity the first engine
+ * of either render or compute class can be chosen.
+ *
+ * Returns: true if engine id is the first to match the render reset domain,
+ * false otherwise.
+ */
+bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
+					  const struct xe_hw_engine *hwe);
+
+/*
+ * xe_rtp_match_first_gslice_fused_off - Match when first gslice is fused off
+ *
+ * @gt: GT structure
+ * @hwe: Engine instance
+ *
+ * Returns: true if first gslice is fused off, false otherwise.
+ */
+bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
+					 const struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h
new file mode 100644
index 000000000000..181b6290fac3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_RTP_HELPERS_
+#define _XE_RTP_HELPERS_
+
+#ifndef _XE_RTP_INCLUDE_PRIVATE_HELPERS
+#error "This header is supposed to be included by xe_rtp.h only"
+#endif
+
+/*
+ * Helper macros - not to be used outside this header.
+ */
+#define _XE_ESC(...) __VA_ARGS__
+#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__, 5, 4, 3, 2, 1,))
+#define __XE_COUNT_ARGS(_, _5, _4, _3, _2, X_, ...) X_
+
+#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,))
+#define __XE_FIRST(x_, ...) x_
+#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__))
+#define __XE_TUPLE_TAIL(x_, ...) (__VA_ARGS__)
+
+#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__
+
+#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b)
+#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b
+
+#define __XE_RTP_PASTE_SEP_COMMA		,
+#define __XE_RTP_PASTE_SEP_BITWISE_OR		|
+
+/*
+ * XE_RTP_PASTE_FOREACH - Paste XE_RTP_<@prefix_> on each element of the tuple
+ * @args, with the end result separated by @sep_. @sep must be one of the
+ * previously declared macros __XE_RTP_PASTE_SEP_*, or declared with such
+ * prefix.
+ *
+ * Examples:
+ *
+ * 1) XE_RTP_PASTE_FOREACH(TEST_, COMMA, (FOO, BAR))
+ *    expands to:
+ *
+ *	XE_RTP_TEST_FOO , XE_RTP_TEST_BAR
+ *
+ * 2) XE_RTP_PASTE_FOREACH(TEST2_, COMMA, (FOO))
+ *    expands to:
+ *
+ *	XE_RTP_TEST2_FOO
+ *
+ * 3) XE_RTP_PASTE_FOREACH(TEST3, BITWISE_OR, (FOO, BAR))
+ *    expands to:
+ *
+ *	XE_RTP_TEST3_FOO | XE_RTP_TEST3_BAR
+ *
+ * 4) #define __XE_RTP_PASTE_SEP_MY_SEP	BANANA
+ *    XE_RTP_PASTE_FOREACH(TEST_, MY_SEP, (FOO, BAR))
+ *    expands to:
+ *
+ *	XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR
+ */
+#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_, _XE_COUNT_ARGS args_)(prefix_, sep_, args_))
+#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_)
+#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
+
+/*
+ * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer
+ *
+ * Example:
+ *
+ *	#define foo(a_)	((struct foo){ .a = a_ })
+ *	XE_RTP_DROP_CAST(foo(10))
+ *	expands to:
+ *
+ *	{ .a = 10 }
+ */
+#define XE_RTP_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
new file mode 100644
index 000000000000..637acc7626a4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RTP_TYPES_
+#define _XE_RTP_TYPES_
+
+#include <linux/types.h>
+
+#include "regs/xe_reg_defs.h"
+
+struct xe_hw_engine;
+struct xe_gt;
+
+/**
+ * struct xe_rtp_action - action to take for any matching rule
+ *
+ * This struct records what action should be taken in a register that has a
+ * matching rule. Example of actions: set/clear bits.
+ */
+struct xe_rtp_action {
+	/** @reg: Register */
+	struct xe_reg		reg;
+	/**
+	 * @clr_bits: bits to clear when updating register. It's always a
+	 * superset of bits being modified
+	 */
+	u32			clr_bits;
+	/** @set_bits: bits to set when updating register */
+	u32			set_bits;
+#define XE_RTP_NOCHECK		.read_mask = 0
+	/** @read_mask: mask for bits to consider when reading value back */
+	u32			read_mask;
+#define XE_RTP_ACTION_FLAG_ENGINE_BASE		BIT(0)
+	/** @flags: flags to apply on rule evaluation or action */
+	u8			flags;
+};
+
+enum {
+	XE_RTP_MATCH_PLATFORM,
+	XE_RTP_MATCH_SUBPLATFORM,
+	XE_RTP_MATCH_GRAPHICS_VERSION,
+	XE_RTP_MATCH_GRAPHICS_VERSION_RANGE,
+	XE_RTP_MATCH_GRAPHICS_STEP,
+	XE_RTP_MATCH_MEDIA_VERSION,
+	XE_RTP_MATCH_MEDIA_VERSION_RANGE,
+	XE_RTP_MATCH_MEDIA_STEP,
+	XE_RTP_MATCH_INTEGRATED,
+	XE_RTP_MATCH_DISCRETE,
+	XE_RTP_MATCH_ENGINE_CLASS,
+	XE_RTP_MATCH_NOT_ENGINE_CLASS,
+	XE_RTP_MATCH_FUNC,
+};
+
+/** struct xe_rtp_rule - match rule for processing entry */
+struct xe_rtp_rule {
+	u8 match_type;
+
+	/* match filters */
+	union {
+		/* MATCH_PLATFORM / MATCH_SUBPLATFORM */
+		struct {
+			u8 platform;
+			u8 subplatform;
+		};
+		/*
+		 * MATCH_GRAPHICS_VERSION / XE_RTP_MATCH_GRAPHICS_VERSION_RANGE /
+		 * MATCH_MEDIA_VERSION  / XE_RTP_MATCH_MEDIA_VERSION_RANGE
+		 */
+		struct {
+			u32 ver_start;
+#define XE_RTP_END_VERSION_UNDEFINED	U32_MAX
+			u32 ver_end;
+		};
+		/* MATCH_STEP */
+		struct {
+			u8 step_start;
+			u8 step_end;
+		};
+		/* MATCH_ENGINE_CLASS / MATCH_NOT_ENGINE_CLASS */
+		struct {
+			u8 engine_class;
+		};
+		/* MATCH_FUNC */
+		bool (*match_func)(const struct xe_gt *gt,
+				   const struct xe_hw_engine *hwe);
+	};
+};
+
+/** struct xe_rtp_entry_sr - Entry in an rtp table */
+struct xe_rtp_entry_sr {
+	const char *name;
+	const struct xe_rtp_action *actions;
+	const struct xe_rtp_rule *rules;
+	u8 n_rules;
+	u8 n_actions;
+#define XE_RTP_ENTRY_FLAG_FOREACH_ENGINE	BIT(0)
+	u8 flags;
+};
+
+/** struct xe_rtp_entry - Entry in an rtp table, with no action associated */
+struct xe_rtp_entry {
+	const char *name;
+	const struct xe_rtp_rule *rules;
+	u8 n_rules;
+};
+
+enum xe_rtp_process_type {
+	XE_RTP_PROCESS_TYPE_GT,
+	XE_RTP_PROCESS_TYPE_ENGINE,
+};
+
+struct xe_rtp_process_ctx {
+	union {
+		struct xe_gt *gt;
+		struct xe_hw_engine *hwe;
+	};
+	enum xe_rtp_process_type type;
+	unsigned long *active_entries;
+	size_t n_entries;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
new file mode 100644
index 000000000000..2c4632259edd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_sa.h"
+
+#include <linux/kernel.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_map.h"
+
+static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_sa_manager *sa_manager = arg;
+	struct xe_bo *bo = sa_manager->bo;
+
+	if (!bo) {
+		drm_err(drm, "no bo for sa manager\n");
+		return;
+	}
+
+	drm_suballoc_manager_fini(&sa_manager->base);
+
+	if (bo->vmap.is_iomem)
+		kvfree(sa_manager->cpu_ptr);
+
+	xe_bo_unpin_map_no_vm(bo);
+	sa_manager->bo = NULL;
+}
+
+struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	u32 managed_size = size - SZ_4K;
+	struct xe_bo *bo;
+	int ret;
+
+	struct xe_sa_manager *sa_manager = drmm_kzalloc(&tile_to_xe(tile)->drm,
+							sizeof(*sa_manager),
+							GFP_KERNEL);
+	if (!sa_manager)
+		return ERR_PTR(-ENOMEM);
+
+	sa_manager->bo = NULL;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+				  XE_BO_CREATE_GGTT_BIT);
+	if (IS_ERR(bo)) {
+		drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n",
+			PTR_ERR(bo));
+		return (struct xe_sa_manager *)bo;
+	}
+	sa_manager->bo = bo;
+
+	drm_suballoc_manager_init(&sa_manager->base, managed_size, align);
+	sa_manager->gpu_addr = xe_bo_ggtt_addr(bo);
+
+	if (bo->vmap.is_iomem) {
+		sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL);
+		if (!sa_manager->cpu_ptr) {
+			xe_bo_unpin_map_no_vm(sa_manager->bo);
+			sa_manager->bo = NULL;
+			return ERR_PTR(-ENOMEM);
+		}
+	} else {
+		sa_manager->cpu_ptr = bo->vmap.vaddr;
+		memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size);
+	}
+
+	ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini,
+				       sa_manager);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return sa_manager;
+}
+
+struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
+				  unsigned int size)
+{
+	return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0);
+}
+
+void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo)
+{
+	struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
+	struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
+
+	if (!sa_manager->bo->vmap.is_iomem)
+		return;
+
+	xe_map_memcpy_to(xe, &sa_manager->bo->vmap, drm_suballoc_soffset(sa_bo),
+			 xe_sa_bo_cpu_addr(sa_bo),
+			 drm_suballoc_size(sa_bo));
+}
+
+void xe_sa_bo_free(struct drm_suballoc *sa_bo,
+		   struct dma_fence *fence)
+{
+	drm_suballoc_free(sa_bo, fence);
+}
diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
new file mode 100644
index 000000000000..4e96483057d7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_SA_H_
+#define _XE_SA_H_
+
+#include "xe_sa_types.h"
+
+struct dma_fence;
+struct xe_bo;
+struct xe_tile;
+
+struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align);
+
+struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
+				  u32 size);
+void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo);
+void xe_sa_bo_free(struct drm_suballoc *sa_bo,
+		   struct dma_fence *fence);
+
+static inline struct xe_sa_manager *
+to_xe_sa_manager(struct drm_suballoc_manager *mng)
+{
+	return container_of(mng, struct xe_sa_manager, base);
+}
+
+static inline u64 xe_sa_bo_gpu_addr(struct drm_suballoc *sa)
+{
+	return to_xe_sa_manager(sa->manager)->gpu_addr +
+		drm_suballoc_soffset(sa);
+}
+
+static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa)
+{
+	return to_xe_sa_manager(sa->manager)->cpu_ptr +
+		drm_suballoc_soffset(sa);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h
new file mode 100644
index 000000000000..2ef896aeca1d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_SA_TYPES_H_
+#define _XE_SA_TYPES_H_
+
+#include <drm/drm_suballoc.h>
+
+struct xe_bo;
+
+struct xe_sa_manager {
+	struct drm_suballoc_manager base;
+	struct xe_bo *bo;
+	u64 gpu_addr;
+	void *cpu_ptr;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
new file mode 100644
index 000000000000..01106a1156ad
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_sched_job.h"
+
+#include <linux/dma-fence-array.h>
+#include <linux/slab.h>
+
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+static struct kmem_cache *xe_sched_job_slab;
+static struct kmem_cache *xe_sched_job_parallel_slab;
+
+int __init xe_sched_job_module_init(void)
+{
+	xe_sched_job_slab =
+		kmem_cache_create("xe_sched_job",
+				  sizeof(struct xe_sched_job) +
+				  sizeof(u64), 0,
+				  SLAB_HWCACHE_ALIGN, NULL);
+	if (!xe_sched_job_slab)
+		return -ENOMEM;
+
+	xe_sched_job_parallel_slab =
+		kmem_cache_create("xe_sched_job_parallel",
+				  sizeof(struct xe_sched_job) +
+				  sizeof(u64) *
+				  XE_HW_ENGINE_MAX_INSTANCE, 0,
+				  SLAB_HWCACHE_ALIGN, NULL);
+	if (!xe_sched_job_parallel_slab) {
+		kmem_cache_destroy(xe_sched_job_slab);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void xe_sched_job_module_exit(void)
+{
+	kmem_cache_destroy(xe_sched_job_slab);
+	kmem_cache_destroy(xe_sched_job_parallel_slab);
+}
+
+static struct xe_sched_job *job_alloc(bool parallel)
+{
+	return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab :
+				 xe_sched_job_slab, GFP_KERNEL);
+}
+
+bool xe_sched_job_is_migration(struct xe_exec_queue *q)
+{
+	return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION);
+}
+
+static void job_free(struct xe_sched_job *job)
+{
+	struct xe_exec_queue *q = job->q;
+	bool is_migration = xe_sched_job_is_migration(q);
+
+	kmem_cache_free(xe_exec_queue_is_parallel(job->q) || is_migration ?
+			xe_sched_job_parallel_slab : xe_sched_job_slab, job);
+}
+
+static struct xe_device *job_to_xe(struct xe_sched_job *job)
+{
+	return gt_to_xe(job->q->gt);
+}
+
+struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
+					 u64 *batch_addr)
+{
+	struct xe_sched_job *job;
+	struct dma_fence **fences;
+	bool is_migration = xe_sched_job_is_migration(q);
+	int err;
+	int i, j;
+	u32 width;
+
+	/* only a kernel context can submit a vm-less job */
+	XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
+
+	/* Migration and kernel engines have their own locking */
+	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
+		lockdep_assert_held(&q->vm->lock);
+		if (!xe_vm_in_lr_mode(q->vm))
+			xe_vm_assert_held(q->vm);
+	}
+
+	job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration);
+	if (!job)
+		return ERR_PTR(-ENOMEM);
+
+	job->q = q;
+	kref_init(&job->refcount);
+	xe_exec_queue_get(job->q);
+
+	err = drm_sched_job_init(&job->drm, q->entity, 1, NULL);
+	if (err)
+		goto err_free;
+
+	if (!xe_exec_queue_is_parallel(q)) {
+		job->fence = xe_lrc_create_seqno_fence(q->lrc);
+		if (IS_ERR(job->fence)) {
+			err = PTR_ERR(job->fence);
+			goto err_sched_job;
+		}
+	} else {
+		struct dma_fence_array *cf;
+
+		fences = kmalloc_array(q->width, sizeof(*fences), GFP_KERNEL);
+		if (!fences) {
+			err = -ENOMEM;
+			goto err_sched_job;
+		}
+
+		for (j = 0; j < q->width; ++j) {
+			fences[j] = xe_lrc_create_seqno_fence(q->lrc + j);
+			if (IS_ERR(fences[j])) {
+				err = PTR_ERR(fences[j]);
+				goto err_fences;
+			}
+		}
+
+		cf = dma_fence_array_create(q->width, fences,
+					    q->parallel.composite_fence_ctx,
+					    q->parallel.composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--q->parallel.composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+
+		/* Sanity check */
+		for (j = 0; j < q->width; ++j)
+			xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno);
+
+		job->fence = &cf->base;
+	}
+
+	width = q->width;
+	if (is_migration)
+		width = 2;
+
+	for (i = 0; i < width; ++i)
+		job->batch_addr[i] = batch_addr[i];
+
+	/* All other jobs require a VM to be open which has a ref */
+	if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL))
+		xe_device_mem_access_get(job_to_xe(job));
+	xe_device_assert_mem_access(job_to_xe(job));
+
+	trace_xe_sched_job_create(job);
+	return job;
+
+err_fences:
+	for (j = j - 1; j >= 0; --j) {
+		--q->lrc[j].fence_ctx.next_seqno;
+		dma_fence_put(fences[j]);
+	}
+	kfree(fences);
+err_sched_job:
+	drm_sched_job_cleanup(&job->drm);
+err_free:
+	xe_exec_queue_put(q);
+	job_free(job);
+	return ERR_PTR(err);
+}
+
+/**
+ * xe_sched_job_destroy - Destroy XE schedule job
+ * @ref: reference to XE schedule job
+ *
+ * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup
+ * base DRM schedule job, and free memory for XE schedule job.
+ */
+void xe_sched_job_destroy(struct kref *ref)
+{
+	struct xe_sched_job *job =
+		container_of(ref, struct xe_sched_job, refcount);
+
+	if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL))
+		xe_device_mem_access_put(job_to_xe(job));
+	xe_exec_queue_put(job->q);
+	dma_fence_put(job->fence);
+	drm_sched_job_cleanup(&job->drm);
+	job_free(job);
+}
+
+void xe_sched_job_set_error(struct xe_sched_job *job, int error)
+{
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags))
+		return;
+
+	dma_fence_set_error(job->fence, error);
+
+	if (dma_fence_is_array(job->fence)) {
+		struct dma_fence_array *array =
+			to_dma_fence_array(job->fence);
+		struct dma_fence **child = array->fences;
+		unsigned int nchild = array->num_fences;
+
+		do {
+			struct dma_fence *current_fence = *child++;
+
+			if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				     &current_fence->flags))
+				continue;
+			dma_fence_set_error(current_fence, error);
+		} while (--nchild);
+	}
+
+	trace_xe_sched_job_set_error(job);
+
+	dma_fence_enable_sw_signaling(job->fence);
+	xe_hw_fence_irq_run(job->q->fence_irq);
+}
+
+bool xe_sched_job_started(struct xe_sched_job *job)
+{
+	struct xe_lrc *lrc = job->q->lrc;
+
+	return !__dma_fence_is_later(xe_sched_job_seqno(job),
+				     xe_lrc_start_seqno(lrc),
+				     job->fence->ops);
+}
+
+bool xe_sched_job_completed(struct xe_sched_job *job)
+{
+	struct xe_lrc *lrc = job->q->lrc;
+
+	/*
+	 * Can safely check just LRC[0] seqno as that is last seqno written when
+	 * parallel handshake is done.
+	 */
+
+	return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_seqno(lrc),
+				     job->fence->ops);
+}
+
+void xe_sched_job_arm(struct xe_sched_job *job)
+{
+	drm_sched_job_arm(&job->drm);
+}
+
+void xe_sched_job_push(struct xe_sched_job *job)
+{
+	xe_sched_job_get(job);
+	trace_xe_sched_job_exec(job);
+	drm_sched_entity_push_job(&job->drm);
+	xe_sched_job_put(job);
+}
+
+/**
+ * xe_sched_job_last_fence_add_dep - Add last fence dependency to job
+ * @job:job to add the last fence dependency to
+ * @vm: virtual memory job belongs to
+ *
+ * Returns:
+ * 0 on success, or an error on failing to expand the array.
+ */
+int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
+{
+	struct dma_fence *fence;
+
+	fence = xe_exec_queue_last_fence_get(job->q, vm);
+	dma_fence_get(fence);
+
+	return drm_sched_job_add_dependency(&job->drm, fence);
+}
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
new file mode 100644
index 000000000000..34f475ba7f50
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_SCHED_JOB_H_
+#define _XE_SCHED_JOB_H_
+
+#include "xe_sched_job_types.h"
+
+struct xe_vm;
+
+#define XE_SCHED_HANG_LIMIT 1
+#define XE_SCHED_JOB_TIMEOUT LONG_MAX
+
+int xe_sched_job_module_init(void);
+void xe_sched_job_module_exit(void);
+
+struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
+					 u64 *batch_addr);
+void xe_sched_job_destroy(struct kref *ref);
+
+/**
+ * xe_sched_job_get - get reference to XE schedule job
+ * @job: XE schedule job object
+ *
+ * Increment XE schedule job's reference count
+ */
+static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job)
+{
+	kref_get(&job->refcount);
+	return job;
+}
+
+/**
+ * xe_sched_job_put - put reference to XE schedule job
+ * @job: XE schedule job object
+ *
+ * Decrement XE schedule job's reference count, call xe_sched_job_destroy when
+ * reference count == 0.
+ */
+static inline void xe_sched_job_put(struct xe_sched_job *job)
+{
+	kref_put(&job->refcount, xe_sched_job_destroy);
+}
+
+void xe_sched_job_set_error(struct xe_sched_job *job, int error);
+static inline bool xe_sched_job_is_error(struct xe_sched_job *job)
+{
+	return job->fence->error < 0;
+}
+
+bool xe_sched_job_started(struct xe_sched_job *job);
+bool xe_sched_job_completed(struct xe_sched_job *job);
+
+void xe_sched_job_arm(struct xe_sched_job *job);
+void xe_sched_job_push(struct xe_sched_job *job);
+
+int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm);
+
+static inline struct xe_sched_job *
+to_xe_sched_job(struct drm_sched_job *drm)
+{
+	return container_of(drm, struct xe_sched_job, drm);
+}
+
+static inline u32 xe_sched_job_seqno(struct xe_sched_job *job)
+{
+	return job->fence->seqno;
+}
+
+static inline void
+xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags)
+{
+	job->migrate_flush_flags = flags;
+}
+
+bool xe_sched_job_is_migration(struct xe_exec_queue *q);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
new file mode 100644
index 000000000000..71213ba9735b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_SCHED_JOB_TYPES_H_
+#define _XE_SCHED_JOB_TYPES_H_
+
+#include <linux/kref.h>
+
+#include <drm/gpu_scheduler.h>
+
+struct xe_exec_queue;
+
+/**
+ * struct xe_sched_job - XE schedule job (batch buffer tracking)
+ */
+struct xe_sched_job {
+	/** @drm: base DRM scheduler job */
+	struct drm_sched_job drm;
+	/** @q: Exec queue */
+	struct xe_exec_queue *q;
+	/** @refcount: ref count of this job */
+	struct kref refcount;
+	/**
+	 * @fence: dma fence to indicate completion. 1 way relationship - job
+	 * can safely reference fence, fence cannot safely reference job.
+	 */
+#define JOB_FLAG_SUBMIT		DMA_FENCE_FLAG_USER_BITS
+	struct dma_fence *fence;
+	/** @user_fence: write back value when BB is complete */
+	struct {
+		/** @used: user fence is used */
+		bool used;
+		/** @addr: address to write to */
+		u64 addr;
+		/** @value: write back value */
+		u64 value;
+	} user_fence;
+	/** @migrate_flush_flags: Additional flush flags for migration jobs */
+	u32 migrate_flush_flags;
+	/** @batch_addr: batch buffer address of job */
+	u64 batch_addr[];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
new file mode 100644
index 000000000000..42a0e0c917a0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_assert.h"
+#include "xe_sriov.h"
+
+/**
+ * xe_sriov_mode_to_string - Convert enum value to string.
+ * @mode: the &xe_sriov_mode to convert
+ *
+ * Returns: SR-IOV mode as a user friendly string.
+ */
+const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode)
+{
+	switch (mode) {
+	case XE_SRIOV_MODE_NONE:
+		return "none";
+	case XE_SRIOV_MODE_PF:
+		return "SR-IOV PF";
+	case XE_SRIOV_MODE_VF:
+		return "SR-IOV VF";
+	default:
+		return "<invalid>";
+	}
+}
+
+/**
+ * xe_sriov_probe_early - Probe a SR-IOV mode.
+ * @xe: the &xe_device to probe mode on
+ * @has_sriov: flag indicating hardware support for SR-IOV
+ *
+ * This function should be called only once and as soon as possible during
+ * driver probe to detect whether we are running a SR-IOV Physical Function
+ * (PF) or a Virtual Function (VF) device.
+ *
+ * SR-IOV PF mode detection is based on PCI @dev_is_pf() function.
+ * SR-IOV VF mode detection is based on dedicated MMIO register read.
+ */
+void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov)
+{
+	enum xe_sriov_mode mode = XE_SRIOV_MODE_NONE;
+
+	/* TODO: replace with proper mode detection */
+	xe_assert(xe, !has_sriov);
+
+	xe_assert(xe, !xe->sriov.__mode);
+	xe->sriov.__mode = mode;
+	xe_assert(xe, xe->sriov.__mode);
+
+	if (has_sriov)
+		drm_info(&xe->drm, "Running in %s mode\n",
+			 xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h
new file mode 100644
index 000000000000..5af73a3172b0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_H_
+#define _XE_SRIOV_H_
+
+#include "xe_assert.h"
+#include "xe_device_types.h"
+#include "xe_sriov_types.h"
+
+const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode);
+
+void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov);
+
+static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe)
+{
+	xe_assert(xe, xe->sriov.__mode);
+	return xe->sriov.__mode;
+}
+
+static inline bool xe_device_is_sriov_pf(struct xe_device *xe)
+{
+	return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_PF;
+}
+
+static inline bool xe_device_is_sriov_vf(struct xe_device *xe)
+{
+	return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_VF;
+}
+
+#ifdef CONFIG_PCI_IOV
+#define IS_SRIOV_PF(xe) xe_device_is_sriov_pf(xe)
+#else
+#define IS_SRIOV_PF(xe) (typecheck(struct xe_device *, (xe)) && false)
+#endif
+#define IS_SRIOV_VF(xe) xe_device_is_sriov_vf(xe)
+
+#define IS_SRIOV(xe) (IS_SRIOV_PF(xe) || IS_SRIOV_VF(xe))
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_printk.h b/drivers/gpu/drm/xe/xe_sriov_printk.h
new file mode 100644
index 000000000000..117e1d541692
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_printk.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PRINTK_H_
+#define _XE_SRIOV_PRINTK_H_
+
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+#include "xe_sriov_types.h"
+
+#define xe_sriov_printk_prefix(xe) \
+	((xe)->sriov.__mode == XE_SRIOV_MODE_PF ? "PF: " : \
+	 (xe)->sriov.__mode == XE_SRIOV_MODE_VF ? "VF: " : "")
+
+#define xe_sriov_printk(xe, _level, fmt, ...) \
+	drm_##_level(&(xe)->drm, "%s" fmt, xe_sriov_printk_prefix(xe), ##__VA_ARGS__)
+
+#define xe_sriov_err(xe, fmt, ...) \
+	xe_sriov_printk((xe), err, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_err_ratelimited(xe, fmt, ...) \
+	xe_sriov_printk((xe), err_ratelimited, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_warn(xe, fmt, ...) \
+	xe_sriov_printk((xe), warn, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_notice(xe, fmt, ...) \
+	xe_sriov_printk((xe), notice, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_info(xe, fmt, ...) \
+	xe_sriov_printk((xe), info, fmt, ##__VA_ARGS__)
+
+#define xe_sriov_dbg(xe, fmt, ...) \
+	xe_sriov_printk((xe), dbg, fmt, ##__VA_ARGS__)
+
+/* for low level noisy debug messages */
+#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
+#define xe_sriov_dbg_verbose(xe, fmt, ...) xe_sriov_dbg(xe, fmt, ##__VA_ARGS__)
+#else
+#define xe_sriov_dbg_verbose(xe, fmt, ...) typecheck(struct xe_device *, (xe))
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h
new file mode 100644
index 000000000000..999a4311b98b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_types.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_TYPES_H_
+#define _XE_SRIOV_TYPES_H_
+
+#include <linux/build_bug.h>
+
+/**
+ * enum xe_sriov_mode - SR-IOV mode
+ * @XE_SRIOV_MODE_NONE: bare-metal mode (non-virtualized)
+ * @XE_SRIOV_MODE_PF: SR-IOV Physical Function (PF) mode
+ * @XE_SRIOV_MODE_VF: SR-IOV Virtual Function (VF) mode
+ */
+enum xe_sriov_mode {
+	/*
+	 * Note: We don't use default enum value 0 to allow catch any too early
+	 * attempt of checking the SR-IOV mode prior to the actual mode probe.
+	 */
+	XE_SRIOV_MODE_NONE = 1,
+	XE_SRIOV_MODE_PF,
+	XE_SRIOV_MODE_VF,
+};
+static_assert(XE_SRIOV_MODE_NONE);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
new file mode 100644
index 000000000000..eaf1b718f26c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_step.h"
+
+#include <linux/bitfield.h>
+
+#include "xe_device.h"
+#include "xe_platform_types.h"
+
+/*
+ * Provide mapping between PCI's revision ID to the individual GMD
+ * (Graphics/Media/Display) stepping values that can be compared numerically.
+ *
+ * Some platforms may have unusual ways of mapping PCI revision ID to GMD
+ * steppings.  E.g., in some cases a higher PCI revision may translate to a
+ * lower stepping of the GT and/or display IP.
+ *
+ * Also note that some revisions/steppings may have been set aside as
+ * placeholders but never materialized in real hardware; in those cases there
+ * may be jumps in the revision IDs or stepping values in the tables below.
+ */
+
+/*
+ * Some platforms always have the same stepping value for GT and display;
+ * use a macro to define these to make it easier to identify the platforms
+ * where the two steppings can deviate.
+ */
+#define COMMON_GT_MEDIA_STEP(x_)	\
+	.graphics = STEP_##x_,		\
+	.media = STEP_##x_
+
+#define COMMON_STEP(x_)			\
+	COMMON_GT_MEDIA_STEP(x_),	\
+	.graphics = STEP_##x_,		\
+	.media = STEP_##x_,		\
+	.display = STEP_##x_
+
+__diag_push();
+__diag_ignore_all("-Woverride-init", "Allow field overrides in table");
+
+/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */
+static const struct xe_step_info tgl_revids[] = {
+	[0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 },
+	[1] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_D0 },
+};
+
+static const struct xe_step_info dg1_revids[] = {
+	[0] = { COMMON_STEP(A0) },
+	[1] = { COMMON_STEP(B0) },
+};
+
+static const struct xe_step_info adls_revids[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
+	[0x1] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A2 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
+	[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_B0 },
+	[0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 },
+};
+
+static const struct xe_step_info adls_rpls_revids[] = {
+	[0x4] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_D0 },
+	[0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 },
+};
+
+static const struct xe_step_info adlp_revids[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
+	[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 },
+	[0xC] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_D0 },
+};
+
+static const struct xe_step_info adlp_rpl_revids[] = {
+	[0x4] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_E0 },
+};
+
+static const struct xe_step_info adln_revids[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_D0 },
+};
+
+static const struct xe_step_info dg2_g10_revid_step_tbl[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
+	[0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
+	[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 },
+};
+
+static const struct xe_step_info dg2_g11_revid_step_tbl[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 },
+	[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_C0 },
+	[0x5] = { COMMON_GT_MEDIA_STEP(B1), .display = STEP_C0 },
+};
+
+static const struct xe_step_info dg2_g12_revid_step_tbl[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_C0 },
+	[0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_C0 },
+};
+
+static const struct xe_step_info pvc_revid_step_tbl[] = {
+	[0x5] = { .graphics = STEP_B0 },
+	[0x6] = { .graphics = STEP_B1 },
+	[0x7] = { .graphics = STEP_C0 },
+};
+
+static const int pvc_basedie_subids[] = {
+	[0x3] = STEP_B0,
+	[0x4] = STEP_B1,
+	[0x5] = STEP_B3,
+};
+
+__diag_pop();
+
+/**
+ * xe_step_pre_gmdid_get - Determine IP steppings from PCI revid
+ * @xe: Xe device
+ *
+ * Convert the PCI revid into proper IP steppings.  This should only be
+ * used on platforms that do not have GMD_ID support.
+ */
+struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe)
+{
+	const struct xe_step_info *revids = NULL;
+	struct xe_step_info step = {};
+	u16 revid = xe->info.revid;
+	int size = 0;
+	const int *basedie_info = NULL;
+	int basedie_size = 0;
+	int baseid = 0;
+
+	if (xe->info.platform == XE_PVC) {
+		baseid = FIELD_GET(GENMASK(5, 3), xe->info.revid);
+		revid = FIELD_GET(GENMASK(2, 0), xe->info.revid);
+		revids = pvc_revid_step_tbl;
+		size = ARRAY_SIZE(pvc_revid_step_tbl);
+		basedie_info = pvc_basedie_subids;
+		basedie_size = ARRAY_SIZE(pvc_basedie_subids);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10) {
+		revids = dg2_g10_revid_step_tbl;
+		size = ARRAY_SIZE(dg2_g10_revid_step_tbl);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G11) {
+		revids = dg2_g11_revid_step_tbl;
+		size = ARRAY_SIZE(dg2_g11_revid_step_tbl);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G12) {
+		revids = dg2_g12_revid_step_tbl;
+		size = ARRAY_SIZE(dg2_g12_revid_step_tbl);
+	} else if (xe->info.platform == XE_ALDERLAKE_N) {
+		revids = adln_revids;
+		size = ARRAY_SIZE(adln_revids);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_S_RPLS) {
+		revids = adls_rpls_revids;
+		size = ARRAY_SIZE(adls_rpls_revids);
+	} else if (xe->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) {
+		revids = adlp_rpl_revids;
+		size = ARRAY_SIZE(adlp_rpl_revids);
+	} else if (xe->info.platform == XE_ALDERLAKE_P) {
+		revids = adlp_revids;
+		size = ARRAY_SIZE(adlp_revids);
+	} else if (xe->info.platform == XE_ALDERLAKE_S) {
+		revids = adls_revids;
+		size = ARRAY_SIZE(adls_revids);
+	} else if (xe->info.platform == XE_DG1) {
+		revids = dg1_revids;
+		size = ARRAY_SIZE(dg1_revids);
+	} else if (xe->info.platform == XE_TIGERLAKE) {
+		revids = tgl_revids;
+		size = ARRAY_SIZE(tgl_revids);
+	}
+
+	/* Not using the stepping scheme for the platform yet. */
+	if (!revids)
+		return step;
+
+	if (revid < size && revids[revid].graphics != STEP_NONE) {
+		step = revids[revid];
+	} else {
+		drm_warn(&xe->drm, "Unknown revid 0x%02x\n", revid);
+
+		/*
+		 * If we hit a gap in the revid array, use the information for
+		 * the next revid.
+		 *
+		 * This may be wrong in all sorts of ways, especially if the
+		 * steppings in the array are not monotonically increasing, but
+		 * it's better than defaulting to 0.
+		 */
+		while (revid < size && revids[revid].graphics == STEP_NONE)
+			revid++;
+
+		if (revid < size) {
+			drm_dbg(&xe->drm, "Using steppings for revid 0x%02x\n",
+				revid);
+			step = revids[revid];
+		} else {
+			drm_dbg(&xe->drm, "Using future steppings\n");
+			step.graphics = STEP_FUTURE;
+			step.display = STEP_FUTURE;
+		}
+	}
+
+	drm_WARN_ON(&xe->drm, step.graphics == STEP_NONE);
+
+	if (basedie_info && basedie_size) {
+		if (baseid < basedie_size && basedie_info[baseid] != STEP_NONE) {
+			step.basedie = basedie_info[baseid];
+		} else {
+			drm_warn(&xe->drm, "Unknown baseid 0x%02x\n", baseid);
+			step.basedie = STEP_FUTURE;
+		}
+	}
+
+	return step;
+}
+
+/**
+ * xe_step_gmdid_get - Determine IP steppings from GMD_ID revid fields
+ * @xe: Xe device
+ * @graphics_gmdid_revid: value of graphics GMD_ID register's revid field
+ * @media_gmdid_revid: value of media GMD_ID register's revid field
+ *
+ * Convert the revid fields of the GMD_ID registers into proper IP steppings.
+ *
+ * GMD_ID revid values are currently expected to have consistent meanings on
+ * all platforms:  major steppings (A0, B0, etc.) are 4 apart, with minor
+ * steppings (A1, A2, etc.) taking the values in between.
+ */
+struct xe_step_info xe_step_gmdid_get(struct xe_device *xe,
+				      u32 graphics_gmdid_revid,
+				      u32 media_gmdid_revid)
+{
+	struct xe_step_info step = {
+		.graphics = STEP_A0 + graphics_gmdid_revid,
+		.media = STEP_A0 + media_gmdid_revid,
+	};
+
+	if (step.graphics >= STEP_FUTURE) {
+		step.graphics = STEP_FUTURE;
+		drm_dbg(&xe->drm, "Graphics GMD_ID revid value %d treated as future stepping\n",
+			graphics_gmdid_revid);
+	}
+
+	if (step.media >= STEP_FUTURE) {
+		step.media = STEP_FUTURE;
+		drm_dbg(&xe->drm, "Media GMD_ID revid value %d treated as future stepping\n",
+			media_gmdid_revid);
+	}
+
+	return step;
+}
+
+#define STEP_NAME_CASE(name)	\
+	case STEP_##name:	\
+		return #name;
+
+const char *xe_step_name(enum xe_step step)
+{
+	switch (step) {
+	STEP_NAME_LIST(STEP_NAME_CASE);
+
+	default:
+		return "**";
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h
new file mode 100644
index 000000000000..686cb59200c2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_STEP_H_
+#define _XE_STEP_H_
+
+#include <linux/types.h>
+
+#include "xe_step_types.h"
+
+struct xe_device;
+
+struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe);
+struct xe_step_info xe_step_gmdid_get(struct xe_device *xe,
+				      u32 graphics_gmdid_revid,
+				      u32 media_gmdid_revid);
+static inline u32 xe_step_to_gmdid(enum xe_step step) { return step - STEP_A0; }
+
+const char *xe_step_name(enum xe_step step);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h
new file mode 100644
index 000000000000..ccc9b4795e95
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step_types.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_STEP_TYPES_H_
+#define _XE_STEP_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_step_info {
+	u8 graphics;
+	u8 media;
+	u8 display;
+	u8 basedie;
+};
+
+#define STEP_ENUM_VAL(name)  STEP_##name,
+
+#define STEP_NAME_LIST(func)		\
+	func(A0)			\
+	func(A1)			\
+	func(A2)			\
+	func(A3)			\
+	func(B0)			\
+	func(B1)			\
+	func(B2)			\
+	func(B3)			\
+	func(C0)			\
+	func(C1)			\
+	func(C2)			\
+	func(C3)			\
+	func(D0)			\
+	func(D1)			\
+	func(D2)			\
+	func(D3)			\
+	func(E0)
+
+/*
+ * Symbolic steppings that do not match the hardware. These are valid both as gt
+ * and display steppings as symbolic names.
+ */
+enum xe_step {
+	STEP_NONE = 0,
+	STEP_NAME_LIST(STEP_ENUM_VAL)
+	STEP_FUTURE,
+	STEP_FOREVER,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
new file mode 100644
index 000000000000..e4c220cf9115
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_sync.h"
+
+#include <linux/dma-fence-array.h>
+#include <linux/kthread.h>
+#include <linux/sched/mm.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_print.h>
+#include <drm/drm_syncobj.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device_types.h"
+#include "xe_exec_queue.h"
+#include "xe_macros.h"
+#include "xe_sched_job_types.h"
+
+struct user_fence {
+	struct xe_device *xe;
+	struct kref refcount;
+	struct dma_fence_cb cb;
+	struct work_struct worker;
+	struct mm_struct *mm;
+	u64 __user *addr;
+	u64 value;
+};
+
+static void user_fence_destroy(struct kref *kref)
+{
+	struct user_fence *ufence = container_of(kref, struct user_fence,
+						 refcount);
+
+	mmdrop(ufence->mm);
+	kfree(ufence);
+}
+
+static void user_fence_get(struct user_fence *ufence)
+{
+	kref_get(&ufence->refcount);
+}
+
+static void user_fence_put(struct user_fence *ufence)
+{
+	kref_put(&ufence->refcount, user_fence_destroy);
+}
+
+static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr,
+					    u64 value)
+{
+	struct user_fence *ufence;
+
+	ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
+	if (!ufence)
+		return NULL;
+
+	ufence->xe = xe;
+	kref_init(&ufence->refcount);
+	ufence->addr = u64_to_user_ptr(addr);
+	ufence->value = value;
+	ufence->mm = current->mm;
+	mmgrab(ufence->mm);
+
+	return ufence;
+}
+
+static void user_fence_worker(struct work_struct *w)
+{
+	struct user_fence *ufence = container_of(w, struct user_fence, worker);
+
+	if (mmget_not_zero(ufence->mm)) {
+		kthread_use_mm(ufence->mm);
+		if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value)))
+			XE_WARN_ON("Copy to user failed");
+		kthread_unuse_mm(ufence->mm);
+		mmput(ufence->mm);
+	}
+
+	wake_up_all(&ufence->xe->ufence_wq);
+	user_fence_put(ufence);
+}
+
+static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence)
+{
+	INIT_WORK(&ufence->worker, user_fence_worker);
+	queue_work(ufence->xe->ordered_wq, &ufence->worker);
+	dma_fence_put(fence);
+}
+
+static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct user_fence *ufence = container_of(cb, struct user_fence, cb);
+
+	kick_ufence(ufence, fence);
+}
+
+int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
+			struct xe_sync_entry *sync,
+			struct drm_xe_sync __user *sync_user,
+			unsigned int flags)
+{
+	struct drm_xe_sync sync_in;
+	int err;
+	bool exec = flags & SYNC_PARSE_FLAG_EXEC;
+	bool in_lr_mode = flags & SYNC_PARSE_FLAG_LR_MODE;
+	bool disallow_user_fence = flags & SYNC_PARSE_FLAG_DISALLOW_USER_FENCE;
+	bool signal;
+
+	if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user)))
+		return -EFAULT;
+
+	if (XE_IOCTL_DBG(xe, sync_in.flags & ~DRM_XE_SYNC_FLAG_SIGNAL) ||
+	    XE_IOCTL_DBG(xe, sync_in.reserved[0] || sync_in.reserved[1]))
+		return -EINVAL;
+
+	signal = sync_in.flags & DRM_XE_SYNC_FLAG_SIGNAL;
+	switch (sync_in.type) {
+	case DRM_XE_SYNC_TYPE_SYNCOBJ:
+		if (XE_IOCTL_DBG(xe, in_lr_mode && signal))
+			return -EOPNOTSUPP;
+
+		if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr)))
+			return -EINVAL;
+
+		sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle);
+		if (XE_IOCTL_DBG(xe, !sync->syncobj))
+			return -ENOENT;
+
+		if (!signal) {
+			sync->fence = drm_syncobj_fence_get(sync->syncobj);
+			if (XE_IOCTL_DBG(xe, !sync->fence))
+				return -EINVAL;
+		}
+		break;
+
+	case DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ:
+		if (XE_IOCTL_DBG(xe, in_lr_mode && signal))
+			return -EOPNOTSUPP;
+
+		if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr)))
+			return -EINVAL;
+
+		if (XE_IOCTL_DBG(xe, sync_in.timeline_value == 0))
+			return -EINVAL;
+
+		sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle);
+		if (XE_IOCTL_DBG(xe, !sync->syncobj))
+			return -ENOENT;
+
+		if (signal) {
+			sync->chain_fence = dma_fence_chain_alloc();
+			if (!sync->chain_fence)
+				return -ENOMEM;
+		} else {
+			sync->fence = drm_syncobj_fence_get(sync->syncobj);
+			if (XE_IOCTL_DBG(xe, !sync->fence))
+				return -EINVAL;
+
+			err = dma_fence_chain_find_seqno(&sync->fence,
+							 sync_in.timeline_value);
+			if (err)
+				return err;
+		}
+		break;
+
+	case DRM_XE_SYNC_TYPE_USER_FENCE:
+		if (XE_IOCTL_DBG(xe, disallow_user_fence))
+			return -EOPNOTSUPP;
+
+		if (XE_IOCTL_DBG(xe, !signal))
+			return -EOPNOTSUPP;
+
+		if (XE_IOCTL_DBG(xe, sync_in.addr & 0x7))
+			return -EINVAL;
+
+		if (exec) {
+			sync->addr = sync_in.addr;
+		} else {
+			sync->ufence = user_fence_create(xe, sync_in.addr,
+							 sync_in.timeline_value);
+			if (XE_IOCTL_DBG(xe, !sync->ufence))
+				return -ENOMEM;
+		}
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	sync->type = sync_in.type;
+	sync->flags = sync_in.flags;
+	sync->timeline_value = sync_in.timeline_value;
+
+	return 0;
+}
+
+int xe_sync_entry_wait(struct xe_sync_entry *sync)
+{
+	if (sync->fence)
+		dma_fence_wait(sync->fence, true);
+
+	return 0;
+}
+
+int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
+{
+	int err;
+
+	if (sync->fence) {
+		err = drm_sched_job_add_dependency(&job->drm,
+						   dma_fence_get(sync->fence));
+		if (err) {
+			dma_fence_put(sync->fence);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
+			  struct dma_fence *fence)
+{
+	if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL))
+		return;
+
+	if (sync->chain_fence) {
+		drm_syncobj_add_point(sync->syncobj, sync->chain_fence,
+				      fence, sync->timeline_value);
+		/*
+		 * The chain's ownership is transferred to the
+		 * timeline.
+		 */
+		sync->chain_fence = NULL;
+	} else if (sync->syncobj) {
+		drm_syncobj_replace_fence(sync->syncobj, fence);
+	} else if (sync->ufence) {
+		int err;
+
+		dma_fence_get(fence);
+		user_fence_get(sync->ufence);
+		err = dma_fence_add_callback(fence, &sync->ufence->cb,
+					     user_fence_cb);
+		if (err == -ENOENT) {
+			kick_ufence(sync->ufence, fence);
+		} else if (err) {
+			XE_WARN_ON("failed to add user fence");
+			user_fence_put(sync->ufence);
+			dma_fence_put(fence);
+		}
+	} else if (sync->type == DRM_XE_SYNC_TYPE_USER_FENCE) {
+		job->user_fence.used = true;
+		job->user_fence.addr = sync->addr;
+		job->user_fence.value = sync->timeline_value;
+	}
+}
+
+void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
+{
+	if (sync->syncobj)
+		drm_syncobj_put(sync->syncobj);
+	if (sync->fence)
+		dma_fence_put(sync->fence);
+	if (sync->chain_fence)
+		dma_fence_put(&sync->chain_fence->base);
+	if (sync->ufence)
+		user_fence_put(sync->ufence);
+}
+
+/**
+ * xe_sync_in_fence_get() - Get a fence from syncs, exec queue, and VM
+ * @sync: input syncs
+ * @num_sync: number of syncs
+ * @q: exec queue
+ * @vm: VM
+ *
+ * Get a fence from syncs, exec queue, and VM. If syncs contain in-fences create
+ * and return a composite fence of all in-fences + last fence. If no in-fences
+ * return last fence on  input exec queue. Caller must drop reference to
+ * returned fence.
+ *
+ * Return: fence on success, ERR_PTR(-ENOMEM) on failure
+ */
+struct dma_fence *
+xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
+		     struct xe_exec_queue *q, struct xe_vm *vm)
+{
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	struct dma_fence *fence;
+	int i, num_in_fence = 0, current_fence = 0;
+
+	lockdep_assert_held(&vm->lock);
+
+	/* Count in-fences */
+	for (i = 0; i < num_sync; ++i) {
+		if (sync[i].fence) {
+			++num_in_fence;
+			fence = sync[i].fence;
+		}
+	}
+
+	/* Easy case... */
+	if (!num_in_fence) {
+		fence = xe_exec_queue_last_fence_get(q, vm);
+		dma_fence_get(fence);
+		return fence;
+	}
+
+	/* Create composite fence */
+	fences = kmalloc_array(num_in_fence + 1, sizeof(*fences), GFP_KERNEL);
+	if (!fences)
+		return ERR_PTR(-ENOMEM);
+	for (i = 0; i < num_sync; ++i) {
+		if (sync[i].fence) {
+			dma_fence_get(sync[i].fence);
+			fences[current_fence++] = sync[i].fence;
+		}
+	}
+	fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm);
+	dma_fence_get(fences[current_fence - 1]);
+	cf = dma_fence_array_create(num_in_fence, fences,
+				    vm->composite_fence_ctx,
+				    vm->composite_fence_seqno++,
+				    false);
+	if (!cf) {
+		--vm->composite_fence_seqno;
+		goto err_out;
+	}
+
+	return &cf->base;
+
+err_out:
+	while (current_fence)
+		dma_fence_put(fences[--current_fence]);
+	kfree(fences);
+	kfree(cf);
+
+	return ERR_PTR(-ENOMEM);
+}
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
new file mode 100644
index 000000000000..d284afbe917c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_SYNC_H_
+#define _XE_SYNC_H_
+
+#include "xe_sync_types.h"
+
+struct xe_device;
+struct xe_exec_queue;
+struct xe_file;
+struct xe_sched_job;
+struct xe_vm;
+
+#define SYNC_PARSE_FLAG_EXEC			BIT(0)
+#define SYNC_PARSE_FLAG_LR_MODE			BIT(1)
+#define SYNC_PARSE_FLAG_DISALLOW_USER_FENCE	BIT(2)
+
+int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
+			struct xe_sync_entry *sync,
+			struct drm_xe_sync __user *sync_user,
+			unsigned int flags);
+int xe_sync_entry_wait(struct xe_sync_entry *sync);
+int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
+			   struct xe_sched_job *job);
+void xe_sync_entry_signal(struct xe_sync_entry *sync,
+			  struct xe_sched_job *job,
+			  struct dma_fence *fence);
+void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
+struct dma_fence *
+xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
+		     struct xe_exec_queue *q, struct xe_vm *vm);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h
new file mode 100644
index 000000000000..852db5e7884f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync_types.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_SYNC_TYPES_H_
+#define _XE_SYNC_TYPES_H_
+
+#include <linux/types.h>
+
+struct drm_syncobj;
+struct dma_fence;
+struct dma_fence_chain;
+struct drm_xe_sync;
+struct user_fence;
+
+struct xe_sync_entry {
+	struct drm_syncobj *syncobj;
+	struct dma_fence *fence;
+	struct dma_fence_chain *chain_fence;
+	struct user_fence *ufence;
+	u64 addr;
+	u64 timeline_value;
+	u32 type;
+	u32 flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
new file mode 100644
index 000000000000..044c20881de7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_migrate.h"
+#include "xe_sa.h"
+#include "xe_tile.h"
+#include "xe_tile_sysfs.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_wa.h"
+
+/**
+ * DOC: Multi-tile Design
+ *
+ * Different vendors use the term "tile" a bit differently, but in the Intel
+ * world, a 'tile' is pretty close to what most people would think of as being
+ * a complete GPU.  When multiple GPUs are placed behind a single PCI device,
+ * that's what is referred to as a "multi-tile device."  In such cases, pretty
+ * much all hardware is replicated per-tile, although certain responsibilities
+ * like PCI communication, reporting of interrupts to the OS, etc. are handled
+ * solely by the "root tile."  A multi-tile platform takes care of tying the
+ * tiles together in a way such that interrupt notifications from remote tiles
+ * are forwarded to the root tile, the per-tile vram is combined into a single
+ * address space, etc.
+ *
+ * In contrast, a "GT" (which officially stands for "Graphics Technology") is
+ * the subset of a GPU/tile that is responsible for implementing graphics
+ * and/or media operations.  The GT is where a lot of the driver implementation
+ * happens since it's where the hardware engines, the execution units, and the
+ * GuC all reside.
+ *
+ * Historically most Intel devices were single-tile devices that contained a
+ * single GT.  PVC is an example of an Intel platform built on a multi-tile
+ * design (i.e., multiple GPUs behind a single PCI device); each PVC tile only
+ * has a single GT.  In contrast, platforms like MTL that have separate chips
+ * for render and media IP are still only a single logical GPU, but the
+ * graphics and media IP blocks are each exposed as a separate GT within that
+ * single GPU.  This is important from a software perspective because multi-GT
+ * platforms like MTL only replicate a subset of the GPU hardware and behave
+ * differently than multi-tile platforms like PVC where nearly everything is
+ * replicated.
+ *
+ * Per-tile functionality (shared by all GTs within the tile):
+ *  - Complete 4MB MMIO space (containing SGunit/SoC registers, GT
+ *    registers, display registers, etc.)
+ *  - Global GTT
+ *  - VRAM (if discrete)
+ *  - Interrupt flows
+ *  - Migration context
+ *  - kernel batchbuffer pool
+ *  - Primary GT
+ *  - Media GT (if media version >= 13)
+ *
+ * Per-GT functionality:
+ *  - GuC
+ *  - Hardware engines
+ *  - Programmable hardware units (subslices, EUs)
+ *  - GSI subset of registers (multiple copies of these registers reside
+ *    within the complete MMIO space provided by the tile, but at different
+ *    offsets --- 0 for render, 0x380000 for media)
+ *  - Multicast register steering
+ *  - TLBs to cache page table translations
+ *  - Reset capability
+ *  - Low-level power management (e.g., C6)
+ *  - Clock frequency
+ *  - MOCS and PAT programming
+ */
+
+/**
+ * xe_tile_alloc - Perform per-tile memory allocation
+ * @tile: Tile to perform allocations for
+ *
+ * Allocates various per-tile data structures using DRM-managed allocations.
+ * Does not touch the hardware.
+ *
+ * Returns -ENOMEM if allocations fail, otherwise 0.
+ */
+static int xe_tile_alloc(struct xe_tile *tile)
+{
+	struct drm_device *drm = &tile_to_xe(tile)->drm;
+
+	tile->mem.ggtt = drmm_kzalloc(drm, sizeof(*tile->mem.ggtt),
+				      GFP_KERNEL);
+	if (!tile->mem.ggtt)
+		return -ENOMEM;
+	tile->mem.ggtt->tile = tile;
+
+	tile->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*tile->mem.vram_mgr), GFP_KERNEL);
+	if (!tile->mem.vram_mgr)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * xe_tile_init_early - Initialize the tile and primary GT
+ * @tile: Tile to initialize
+ * @xe: Parent Xe device
+ * @id: Tile ID
+ *
+ * Initializes per-tile resources that don't require any interactions with the
+ * hardware or any knowledge about the Graphics/Media IP version.
+ *
+ * Returns: 0 on success, negative error code on error.
+ */
+int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id)
+{
+	int err;
+
+	tile->xe = xe;
+	tile->id = id;
+
+	err = xe_tile_alloc(tile);
+	if (err)
+		return err;
+
+	tile->primary_gt = xe_gt_alloc(tile);
+	if (IS_ERR(tile->primary_gt))
+		return PTR_ERR(tile->primary_gt);
+
+	return 0;
+}
+
+static int tile_ttm_mgr_init(struct xe_tile *tile)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	int err;
+
+	if (tile->mem.vram.usable_size) {
+		err = xe_ttm_vram_mgr_init(tile, tile->mem.vram_mgr);
+		if (err)
+			return err;
+		xe->info.mem_region_mask |= BIT(tile->id) << 1;
+	}
+
+	return 0;
+}
+
+/**
+ * xe_tile_init_noalloc - Init tile up to the point where allocations can happen.
+ * @tile: The tile to initialize.
+ *
+ * This function prepares the tile to allow memory allocations to VRAM, but is
+ * not allowed to allocate memory itself. This state is useful for display
+ * readout, because the inherited display framebuffer will otherwise be
+ * overwritten as it is usually put at the start of VRAM.
+ *
+ * Note that since this is tile initialization, it should not perform any
+ * GT-specific operations, and thus does not need to hold GT forcewake.
+ *
+ * Returns: 0 on success, negative error code on error.
+ */
+int xe_tile_init_noalloc(struct xe_tile *tile)
+{
+	int err;
+
+	xe_device_mem_access_get(tile_to_xe(tile));
+
+	err = tile_ttm_mgr_init(tile);
+	if (err)
+		goto err_mem_access;
+
+	tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16);
+	if (IS_ERR(tile->mem.kernel_bb_pool))
+		err = PTR_ERR(tile->mem.kernel_bb_pool);
+
+	xe_wa_apply_tile_workarounds(tile);
+
+	xe_tile_sysfs_init(tile);
+
+err_mem_access:
+	xe_device_mem_access_put(tile_to_xe(tile));
+	return err;
+}
+
+void xe_tile_migrate_wait(struct xe_tile *tile)
+{
+	xe_migrate_wait(tile->migrate);
+}
diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h
new file mode 100644
index 000000000000..1c9e42ade6b0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TILE_H_
+#define _XE_TILE_H_
+
+#include "xe_device_types.h"
+
+struct xe_tile;
+
+int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id);
+int xe_tile_init_noalloc(struct xe_tile *tile);
+
+void xe_tile_migrate_wait(struct xe_tile *tile);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c
new file mode 100644
index 000000000000..0f8d3e7fce46
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_tile.h"
+#include "xe_tile_sysfs.h"
+
+static void xe_tile_sysfs_kobj_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static const struct kobj_type xe_tile_sysfs_kobj_type = {
+	.release = xe_tile_sysfs_kobj_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void tile_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_tile *tile = arg;
+
+	kobject_put(tile->sysfs);
+}
+
+void xe_tile_sysfs_init(struct xe_tile *tile)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct device *dev = xe->drm.dev;
+	struct kobj_tile *kt;
+	int err;
+
+	kt = kzalloc(sizeof(*kt), GFP_KERNEL);
+	if (!kt)
+		return;
+
+	kobject_init(&kt->base, &xe_tile_sysfs_kobj_type);
+	kt->tile = tile;
+
+	err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id);
+	if (err) {
+		kobject_put(&kt->base);
+		drm_warn(&xe->drm, "failed to register TILE sysfs directory, err: %d\n", err);
+		return;
+	}
+
+	tile->sysfs = &kt->base;
+
+	err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile);
+	if (err)
+		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+			 __func__, err);
+}
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.h b/drivers/gpu/drm/xe/xe_tile_sysfs.h
new file mode 100644
index 000000000000..e4f065039eba
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TILE_SYSFS_H_
+#define _XE_TILE_SYSFS_H_
+
+#include "xe_tile_sysfs_types.h"
+
+void xe_tile_sysfs_init(struct xe_tile *tile);
+
+static inline struct xe_tile *
+kobj_to_tile(struct kobject *kobj)
+{
+	return container_of(kobj, struct kobj_tile, base)->tile;
+}
+
+#endif /* _XE_TILE_SYSFS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs_types.h b/drivers/gpu/drm/xe/xe_tile_sysfs_types.h
new file mode 100644
index 000000000000..75906ba11a9e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs_types.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TILE_SYSFS_TYPES_H_
+#define _XE_TILE_SYSFS_TYPES_H_
+
+#include <linux/kobject.h>
+
+struct xe_tile;
+
+/**
+ * struct kobj_tile - A tile's kobject struct that connects the kobject
+ * and the TILE
+ *
+ * When dealing with multiple TILEs, this struct helps to understand which
+ * TILE needs to be addressed on a given sysfs call.
+ */
+struct kobj_tile {
+	/** @base: The actual kobject */
+	struct kobject base;
+	/** @tile: A pointer to the tile itself */
+	struct xe_tile *tile;
+};
+
+#endif	/* _XE_TILE_SYSFS_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_trace.c b/drivers/gpu/drm/xe/xe_trace.c
new file mode 100644
index 000000000000..2527c556bff1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "xe_trace.h"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
new file mode 100644
index 000000000000..95163c303f3e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -0,0 +1,608 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM xe
+
+#if !defined(_XE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _XE_TRACE_H_
+
+#include <linux/tracepoint.h>
+#include <linux/types.h>
+
+#include "xe_bo_types.h"
+#include "xe_exec_queue_types.h"
+#include "xe_gpu_scheduler_types.h"
+#include "xe_gt_tlb_invalidation_types.h"
+#include "xe_gt_types.h"
+#include "xe_guc_exec_queue_types.h"
+#include "xe_sched_job.h"
+#include "xe_vm.h"
+
+DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
+		    TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+		    TP_ARGS(fence),
+
+		    TP_STRUCT__entry(
+			     __field(u64, fence)
+			     __field(int, seqno)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->fence = (u64)fence;
+			   __entry->seqno = fence->seqno;
+			   ),
+
+		    TP_printk("fence=0x%016llx, seqno=%d",
+			      __entry->fence, __entry->seqno)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_create,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence,
+	     xe_gt_tlb_invalidation_fence_work_func,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_cb,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_send,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_recv,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout,
+	     TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DECLARE_EVENT_CLASS(xe_bo,
+		    TP_PROTO(struct xe_bo *bo),
+		    TP_ARGS(bo),
+
+		    TP_STRUCT__entry(
+			     __field(size_t, size)
+			     __field(u32, flags)
+			     __field(u64, vm)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->size = bo->size;
+			   __entry->flags = bo->flags;
+			   __entry->vm = (unsigned long)bo->vm;
+			   ),
+
+		    TP_printk("size=%zu, flags=0x%02x, vm=0x%016llx",
+			      __entry->size, __entry->flags, __entry->vm)
+);
+
+DEFINE_EVENT(xe_bo, xe_bo_cpu_fault,
+	     TP_PROTO(struct xe_bo *bo),
+	     TP_ARGS(bo)
+);
+
+DEFINE_EVENT(xe_bo, xe_bo_move,
+	     TP_PROTO(struct xe_bo *bo),
+	     TP_ARGS(bo)
+);
+
+DECLARE_EVENT_CLASS(xe_exec_queue,
+		    TP_PROTO(struct xe_exec_queue *q),
+		    TP_ARGS(q),
+
+		    TP_STRUCT__entry(
+			     __field(enum xe_engine_class, class)
+			     __field(u32, logical_mask)
+			     __field(u8, gt_id)
+			     __field(u16, width)
+			     __field(u16, guc_id)
+			     __field(u32, guc_state)
+			     __field(u32, flags)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->class = q->class;
+			   __entry->logical_mask = q->logical_mask;
+			   __entry->gt_id = q->gt->info.id;
+			   __entry->width = q->width;
+			   __entry->guc_id = q->guc->id;
+			   __entry->guc_state = atomic_read(&q->guc->state);
+			   __entry->flags = q->flags;
+			   ),
+
+		    TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x",
+			      __entry->class, __entry->logical_mask,
+			      __entry->gt_id, __entry->width, __entry->guc_id,
+			      __entry->guc_state, __entry->flags)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_create,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_supress_resume,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_submit,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_enable,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_disable,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_done,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_register,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_deregister,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_deregister_done,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_close,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_kill,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_cleanup_entity,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_destroy,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_reset,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_memory_cat_error,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_stop,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_resubmit,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_lr_cleanup,
+	     TP_PROTO(struct xe_exec_queue *q),
+	     TP_ARGS(q)
+);
+
+DECLARE_EVENT_CLASS(xe_sched_job,
+		    TP_PROTO(struct xe_sched_job *job),
+		    TP_ARGS(job),
+
+		    TP_STRUCT__entry(
+			     __field(u32, seqno)
+			     __field(u16, guc_id)
+			     __field(u32, guc_state)
+			     __field(u32, flags)
+			     __field(int, error)
+			     __field(u64, fence)
+			     __field(u64, batch_addr)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->seqno = xe_sched_job_seqno(job);
+			   __entry->guc_id = job->q->guc->id;
+			   __entry->guc_state =
+			   atomic_read(&job->q->guc->state);
+			   __entry->flags = job->q->flags;
+			   __entry->error = job->fence->error;
+			   __entry->fence = (unsigned long)job->fence;
+			   __entry->batch_addr = (u64)job->batch_addr[0];
+			   ),
+
+		    TP_printk("fence=0x%016llx, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
+			      __entry->fence, __entry->seqno, __entry->guc_id,
+			      __entry->batch_addr, __entry->guc_state,
+			      __entry->flags, __entry->error)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_create,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_exec,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_run,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_free,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_timedout,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_set_error,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_ban,
+	     TP_PROTO(struct xe_sched_job *job),
+	     TP_ARGS(job)
+);
+
+DECLARE_EVENT_CLASS(xe_sched_msg,
+		    TP_PROTO(struct xe_sched_msg *msg),
+		    TP_ARGS(msg),
+
+		    TP_STRUCT__entry(
+			     __field(u32, opcode)
+			     __field(u16, guc_id)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->opcode = msg->opcode;
+			   __entry->guc_id =
+			   ((struct xe_exec_queue *)msg->private_data)->guc->id;
+			   ),
+
+		    TP_printk("guc_id=%d, opcode=%u", __entry->guc_id,
+			      __entry->opcode)
+);
+
+DEFINE_EVENT(xe_sched_msg, xe_sched_msg_add,
+	     TP_PROTO(struct xe_sched_msg *msg),
+	     TP_ARGS(msg)
+);
+
+DEFINE_EVENT(xe_sched_msg, xe_sched_msg_recv,
+	     TP_PROTO(struct xe_sched_msg *msg),
+	     TP_ARGS(msg)
+);
+
+DECLARE_EVENT_CLASS(xe_hw_fence,
+		    TP_PROTO(struct xe_hw_fence *fence),
+		    TP_ARGS(fence),
+
+		    TP_STRUCT__entry(
+			     __field(u64, ctx)
+			     __field(u32, seqno)
+			     __field(u64, fence)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->ctx = fence->dma.context;
+			   __entry->seqno = fence->dma.seqno;
+			   __entry->fence = (unsigned long)fence;
+			   ),
+
+		    TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u",
+			      __entry->ctx, __entry->fence, __entry->seqno)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_create,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_signal,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_try_signal,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_free,
+	     TP_PROTO(struct xe_hw_fence *fence),
+	     TP_ARGS(fence)
+);
+
+DECLARE_EVENT_CLASS(xe_vma,
+		    TP_PROTO(struct xe_vma *vma),
+		    TP_ARGS(vma),
+
+		    TP_STRUCT__entry(
+			     __field(u64, vma)
+			     __field(u32, asid)
+			     __field(u64, start)
+			     __field(u64, end)
+			     __field(u64, ptr)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->vma = (unsigned long)vma;
+			   __entry->asid = xe_vma_vm(vma)->usm.asid;
+			   __entry->start = xe_vma_start(vma);
+			   __entry->end = xe_vma_end(vma) - 1;
+			   __entry->ptr = xe_vma_userptr(vma);
+			   ),
+
+		    TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,",
+			      __entry->vma, __entry->asid, __entry->start,
+			      __entry->end, __entry->ptr)
+)
+
+DEFINE_EVENT(xe_vma, xe_vma_flush,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_pagefault,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_acc,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_fail,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_bind,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_pf_bind,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_unbind,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_rebind_worker,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_rebind_exec,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_usm_invalidate,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_evict,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete,
+	     TP_PROTO(struct xe_vma *vma),
+	     TP_ARGS(vma)
+);
+
+DECLARE_EVENT_CLASS(xe_vm,
+		    TP_PROTO(struct xe_vm *vm),
+		    TP_ARGS(vm),
+
+		    TP_STRUCT__entry(
+			     __field(u64, vm)
+			     __field(u32, asid)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->vm = (unsigned long)vm;
+			   __entry->asid = vm->usm.asid;
+			   ),
+
+		    TP_printk("vm=0x%016llx, asid=0x%05x",  __entry->vm,
+			      __entry->asid)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_kill,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_create,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_free,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_cpu_bind,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_restart,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit,
+	     TP_PROTO(struct xe_vm *vm),
+	     TP_ARGS(vm)
+);
+
+/* GuC */
+DECLARE_EVENT_CLASS(xe_guc_ct_flow_control,
+		    TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+		    TP_ARGS(_head, _tail, size, space, len),
+
+		    TP_STRUCT__entry(
+			     __field(u32, _head)
+			     __field(u32, _tail)
+			     __field(u32, size)
+			     __field(u32, space)
+			     __field(u32, len)
+			     ),
+
+		    TP_fast_assign(
+			   __entry->_head = _head;
+			   __entry->_tail = _tail;
+			   __entry->size = size;
+			   __entry->space = space;
+			   __entry->len = len;
+			   ),
+
+		    TP_printk("h2g flow control: head=%u, tail=%u, size=%u, space=%u, len=%u",
+			      __entry->_head, __entry->_tail, __entry->size,
+			      __entry->space, __entry->len)
+);
+
+DEFINE_EVENT(xe_guc_ct_flow_control, xe_guc_ct_h2g_flow_control,
+	     TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+	     TP_ARGS(_head, _tail, size, space, len)
+);
+
+DEFINE_EVENT_PRINT(xe_guc_ct_flow_control, xe_guc_ct_g2h_flow_control,
+		   TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+		   TP_ARGS(_head, _tail, size, space, len),
+
+		   TP_printk("g2h flow control: head=%u, tail=%u, size=%u, space=%u, len=%u",
+			     __entry->_head, __entry->_tail, __entry->size,
+			     __entry->space, __entry->len)
+);
+
+DECLARE_EVENT_CLASS(xe_guc_ctb,
+		    TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail),
+		    TP_ARGS(gt_id, action, len, _head, tail),
+
+		    TP_STRUCT__entry(
+				__field(u8, gt_id)
+				__field(u32, action)
+				__field(u32, len)
+				__field(u32, tail)
+				__field(u32, _head)
+		    ),
+
+		    TP_fast_assign(
+			    __entry->gt_id = gt_id;
+			    __entry->action = action;
+			    __entry->len = len;
+			    __entry->tail = tail;
+			    __entry->_head = _head;
+		    ),
+
+		    TP_printk("gt%d: H2G CTB: action=0x%x, len=%d, tail=%d, head=%d\n",
+			      __entry->gt_id, __entry->action, __entry->len,
+			      __entry->tail, __entry->_head)
+);
+
+DEFINE_EVENT(xe_guc_ctb, xe_guc_ctb_h2g,
+	     TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail),
+	     TP_ARGS(gt_id, action, len, _head, tail)
+);
+
+DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h,
+		   TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail),
+		   TP_ARGS(gt_id, action, len, _head, tail),
+
+		   TP_printk("gt%d: G2H CTB: action=0x%x, len=%d, tail=%d, head=%d\n",
+			     __entry->gt_id, __entry->action, __entry->len,
+			     __entry->tail, __entry->_head)
+
+);
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe
+#define TRACE_INCLUDE_FILE xe_trace
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
new file mode 100644
index 000000000000..d2b00d0bf1e2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2023 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include <drm/drm_managed.h>
+#include <drm/drm_mm.h>
+
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+
+#include "generated/xe_wa_oob.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "xe_res_cursor.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_wa.h"
+
+struct xe_ttm_stolen_mgr {
+	struct xe_ttm_vram_mgr base;
+
+	/* PCI base offset */
+	resource_size_t io_base;
+	/* GPU base offset */
+	resource_size_t stolen_base;
+
+	void *__iomem mapping;
+};
+
+static inline struct xe_ttm_stolen_mgr *
+to_stolen_mgr(struct ttm_resource_manager *man)
+{
+	return container_of(man, struct xe_ttm_stolen_mgr, base.manager);
+}
+
+/**
+ * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access
+ * stolen, can we then fallback to mapping through the GGTT.
+ * @xe: xe device
+ *
+ * Some older integrated platforms don't support reliable CPU access for stolen,
+ * however on such hardware we can always use the mappable part of the GGTT for
+ * CPU access. Check if that's the case for this device.
+ */
+bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
+{
+	return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe);
+}
+
+static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
+{
+	struct xe_tile *tile = xe_device_get_root_tile(xe);
+	struct xe_gt *mmio = xe_root_mmio_gt(xe);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	u64 stolen_size;
+	u64 tile_offset;
+	u64 tile_size;
+
+	tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start;
+	tile_size = tile->mem.vram.actual_physical_size;
+
+	/* Use DSM base address instead for stolen memory */
+	mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
+	if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
+		return 0;
+
+	stolen_size = tile_size - mgr->stolen_base;
+
+	/* Verify usage fits in the actual resource available */
+	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR))
+		mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base;
+
+	/*
+	 * There may be few KB of platform dependent reserved memory at the end
+	 * of vram which is not part of the DSM. Such reserved memory portion is
+	 * always less then DSM granularity so align down the stolen_size to DSM
+	 * granularity to accommodate such reserve vram portion.
+	 */
+	return ALIGN_DOWN(stolen_size, SZ_1M);
+}
+
+static u32 get_wopcm_size(struct xe_device *xe)
+{
+	u32 wopcm_size;
+	u64 val;
+
+	val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED);
+	val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val);
+
+	switch (val) {
+	case 0x5 ... 0x6:
+		val--;
+		fallthrough;
+	case 0x0 ... 0x3:
+		wopcm_size = (1U << val) * SZ_1M;
+		break;
+	default:
+		WARN(1, "Missing case wopcm_size=%llx\n", val);
+		wopcm_size = 0;
+	}
+
+	return wopcm_size;
+}
+
+static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt;
+	u32 stolen_size, wopcm_size;
+	u32 ggc, gms;
+
+	ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC);
+
+	/*
+	 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the
+	 * GTT size
+	 */
+	if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK))
+		return 0;
+
+	/*
+	 * Graphics >= 1270 uses the offset to the GSMBASE as address in the
+	 * PTEs, together with the DM flag being set. Previously there was no
+	 * such flag so the address was the io_base.
+	 *
+	 * DSMBASE = GSMBASE + 8MB
+	 */
+	mgr->stolen_base = SZ_8M;
+	mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base;
+
+	/* return valid GMS value, -EIO if invalid */
+	gms = REG_FIELD_GET(GMS_MASK, ggc);
+	switch (gms) {
+	case 0x0 ... 0x04:
+		stolen_size = gms * 32 * SZ_1M;
+		break;
+	case 0xf0 ... 0xfe:
+		stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M;
+		break;
+	default:
+		return 0;
+	}
+
+	/* Carve out the top of DSM as it contains the reserved WOPCM region */
+	wopcm_size = get_wopcm_size(xe);
+	if (drm_WARN_ON(&xe->drm, !wopcm_size))
+		return 0;
+
+	stolen_size -= wopcm_size;
+
+	if (media_gt && XE_WA(media_gt, 14019821291)) {
+		u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE)
+			& ~GENMASK_ULL(5, 0);
+
+		/*
+		 * This workaround is primarily implemented by the BIOS.  We
+		 * just need to figure out whether the BIOS has applied the
+		 * workaround (meaning the programmed address falls within
+		 * the DSM) and, if so, reserve that part of the DSM to
+		 * prevent accidental reuse.  The DSM location should be just
+		 * below the WOPCM.
+		 */
+		if (gscpsmi_base >= mgr->io_base &&
+		    gscpsmi_base < mgr->io_base + stolen_size) {
+			xe_gt_dbg(media_gt,
+				  "Reserving %llu bytes of DSM for Wa_14019821291\n",
+				  mgr->io_base + stolen_size - gscpsmi_base);
+			stolen_size = gscpsmi_base - mgr->io_base;
+		}
+	}
+
+	if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
+		return 0;
+
+	return stolen_size;
+}
+
+extern struct resource intel_graphics_stolen_res;
+
+static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
+{
+#ifdef CONFIG_X86
+	/* Map into GGTT */
+	mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2);
+
+	/* Stolen memory is x86 only */
+	mgr->stolen_base = intel_graphics_stolen_res.start;
+	return resource_size(&intel_graphics_stolen_res);
+#else
+	return 0;
+#endif
+}
+
+void xe_ttm_stolen_mgr_init(struct xe_device *xe)
+{
+	struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	u64 stolen_size, io_size, pgsize;
+	int err;
+
+	if (IS_DGFX(xe))
+		stolen_size = detect_bar2_dgfx(xe, mgr);
+	else if (GRAPHICS_VERx100(xe) >= 1270)
+		stolen_size = detect_bar2_integrated(xe, mgr);
+	else
+		stolen_size = detect_stolen(xe, mgr);
+
+	if (!stolen_size) {
+		drm_dbg_kms(&xe->drm, "No stolen memory support\n");
+		return;
+	}
+
+	pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+	if (pgsize < PAGE_SIZE)
+		pgsize = PAGE_SIZE;
+
+	/*
+	 * We don't try to attempt partial visible support for stolen vram,
+	 * since stolen is always at the end of vram, and the BAR size is pretty
+	 * much always 256M, with small-bar.
+	 */
+	io_size = 0;
+	if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe))
+		io_size = stolen_size;
+
+	err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size,
+				     io_size, pgsize);
+	if (err) {
+		drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
+		return;
+	}
+
+	drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
+		    stolen_size);
+
+	if (io_size)
+		mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size);
+}
+
+u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
+	struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr);
+	struct xe_res_cursor cur;
+
+	XE_WARN_ON(!mgr->io_base);
+
+	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
+		return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
+
+	xe_res_first(bo->ttm.resource, offset, 4096, &cur);
+	return mgr->io_base + cur.start;
+}
+
+static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
+					       struct xe_ttm_stolen_mgr *mgr,
+					       struct ttm_resource *mem)
+{
+	struct xe_res_cursor cur;
+
+	if (!mgr->io_base)
+		return -EIO;
+
+	xe_res_first(mem, 0, 4096, &cur);
+	mem->bus.offset = cur.start;
+
+	drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
+
+	if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
+		mem->bus.addr = (u8 *)mgr->mapping + mem->bus.offset;
+
+	mem->bus.offset += mgr->io_base;
+	mem->bus.is_iomem = true;
+	mem->bus.caching = ttm_write_combined;
+
+	return 0;
+}
+
+static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
+						 struct xe_ttm_stolen_mgr *mgr,
+						 struct ttm_resource *mem)
+{
+#ifdef CONFIG_X86
+	struct xe_bo *bo = ttm_to_xe_bo(mem->bo);
+
+	XE_WARN_ON(IS_DGFX(xe));
+
+	/* XXX: Require BO to be mapped to GGTT? */
+	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT)))
+		return -EIO;
+
+	/* GGTT is always contiguously mapped */
+	mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base;
+
+	mem->bus.is_iomem = true;
+	mem->bus.caching = ttm_write_combined;
+
+	return 0;
+#else
+	/* How is it even possible to get here without gen12 stolen? */
+	drm_WARN_ON(&xe->drm, 1);
+	return -EIO;
+#endif
+}
+
+int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
+{
+	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
+	struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL;
+
+	if (!mgr || !mgr->io_base)
+		return -EIO;
+
+	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
+		return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem);
+	else
+		return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem);
+}
+
+u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe)
+{
+	struct xe_ttm_stolen_mgr *mgr =
+		to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN));
+
+	return mgr->stolen_base;
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
new file mode 100644
index 000000000000..1777245ff810
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_STOLEN_MGR_H_
+#define _XE_TTM_STOLEN_MGR_H_
+
+#include <linux/types.h>
+
+struct ttm_resource;
+struct xe_bo;
+struct xe_device;
+
+void xe_ttm_stolen_mgr_init(struct xe_device *xe);
+int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem);
+bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe);
+u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset);
+u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
new file mode 100644
index 000000000000..3e1fa0c832ca
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include "xe_ttm_sys_mgr.h"
+
+#include <drm/drm_managed.h>
+
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+
+struct xe_ttm_sys_node {
+	struct ttm_buffer_object *tbo;
+	struct ttm_range_mgr_node base;
+};
+
+static inline struct xe_ttm_sys_node *
+to_xe_ttm_sys_node(struct ttm_resource *res)
+{
+	return container_of(res, struct xe_ttm_sys_node, base.base);
+}
+
+static int xe_ttm_sys_mgr_new(struct ttm_resource_manager *man,
+			      struct ttm_buffer_object *tbo,
+			      const struct ttm_place *place,
+			      struct ttm_resource **res)
+{
+	struct xe_ttm_sys_node *node;
+	int r;
+
+	node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	node->tbo = tbo;
+	ttm_resource_init(tbo, place, &node->base.base);
+
+	if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
+	    ttm_resource_manager_usage(man) > (man->size << PAGE_SHIFT)) {
+		r = -ENOSPC;
+		goto err_fini;
+	}
+
+	node->base.mm_nodes[0].start = 0;
+	node->base.mm_nodes[0].size = PFN_UP(node->base.base.size);
+	node->base.base.start = XE_BO_INVALID_OFFSET;
+
+	*res = &node->base.base;
+
+	return 0;
+
+err_fini:
+	ttm_resource_fini(man, &node->base.base);
+	kfree(node);
+	return r;
+}
+
+static void xe_ttm_sys_mgr_del(struct ttm_resource_manager *man,
+			       struct ttm_resource *res)
+{
+	struct xe_ttm_sys_node *node = to_xe_ttm_sys_node(res);
+
+	ttm_resource_fini(man, res);
+	kfree(node);
+}
+
+static void xe_ttm_sys_mgr_debug(struct ttm_resource_manager *man,
+				 struct drm_printer *printer)
+{
+
+}
+
+static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = {
+	.alloc = xe_ttm_sys_mgr_new,
+	.free = xe_ttm_sys_mgr_del,
+	.debug = xe_ttm_sys_mgr_debug
+};
+
+static void ttm_sys_mgr_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = (struct xe_device *)arg;
+	struct ttm_resource_manager *man = &xe->mem.sys_mgr;
+	int err;
+
+	ttm_resource_manager_set_used(man, false);
+
+	err = ttm_resource_manager_evict_all(&xe->ttm, man);
+	if (err)
+		return;
+
+	ttm_resource_manager_cleanup(man);
+	ttm_set_driver_manager(&xe->ttm, XE_PL_TT, NULL);
+}
+
+int xe_ttm_sys_mgr_init(struct xe_device *xe)
+{
+	struct ttm_resource_manager *man = &xe->mem.sys_mgr;
+	struct sysinfo si;
+	u64 gtt_size;
+
+	si_meminfo(&si);
+	gtt_size = (u64)si.totalram * si.mem_unit;
+	/* TTM limits allocation of all TTM devices by 50% of system memory */
+	gtt_size /= 2;
+
+	man->use_tt = true;
+	man->func = &xe_ttm_sys_mgr_func;
+	ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT);
+	ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man);
+	ttm_resource_manager_set_used(man, true);
+	return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h
new file mode 100644
index 000000000000..e8f5cd395b28
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TTM_SYS_MGR_H_
+#define _XE_TTM_SYS_MGR_H_
+
+struct xe_device;
+
+int xe_ttm_sys_mgr_init(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
new file mode 100644
index 000000000000..115ec745e502
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include <drm/drm_managed.h>
+
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_res_cursor.h"
+#include "xe_ttm_vram_mgr.h"
+
+static inline struct drm_buddy_block *
+xe_ttm_vram_mgr_first_block(struct list_head *list)
+{
+	return list_first_entry_or_null(list, struct drm_buddy_block, link);
+}
+
+static inline bool xe_is_vram_mgr_blocks_contiguous(struct drm_buddy *mm,
+						    struct list_head *head)
+{
+	struct drm_buddy_block *block;
+	u64 start, size;
+
+	block = xe_ttm_vram_mgr_first_block(head);
+	if (!block)
+		return false;
+
+	while (head != block->link.next) {
+		start = drm_buddy_block_offset(block);
+		size = drm_buddy_block_size(mm, block);
+
+		block = list_entry(block->link.next, struct drm_buddy_block,
+				   link);
+		if (start + size != drm_buddy_block_offset(block))
+			return false;
+	}
+
+	return true;
+}
+
+static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
+			       struct ttm_buffer_object *tbo,
+			       const struct ttm_place *place,
+			       struct ttm_resource **res)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+	struct xe_ttm_vram_mgr_resource *vres;
+	struct drm_buddy *mm = &mgr->mm;
+	u64 size, remaining_size, min_page_size;
+	unsigned long lpfn;
+	int err;
+
+	lpfn = place->lpfn;
+	if (!lpfn || lpfn > man->size >> PAGE_SHIFT)
+		lpfn = man->size >> PAGE_SHIFT;
+
+	if (tbo->base.size >> PAGE_SHIFT > (lpfn - place->fpfn))
+		return -E2BIG; /* don't trigger eviction for the impossible */
+
+	vres = kzalloc(sizeof(*vres), GFP_KERNEL);
+	if (!vres)
+		return -ENOMEM;
+
+	ttm_resource_init(tbo, place, &vres->base);
+
+	/* bail out quickly if there's likely not enough VRAM for this BO */
+	if (ttm_resource_manager_usage(man) > man->size) {
+		err = -ENOSPC;
+		goto error_fini;
+	}
+
+	INIT_LIST_HEAD(&vres->blocks);
+
+	if (place->flags & TTM_PL_FLAG_TOPDOWN)
+		vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
+	if (place->fpfn || lpfn != man->size >> PAGE_SHIFT)
+		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
+	if (WARN_ON(!vres->base.size)) {
+		err = -EINVAL;
+		goto error_fini;
+	}
+	size = vres->base.size;
+
+	min_page_size = mgr->default_page_size;
+	if (tbo->page_alignment)
+		min_page_size = tbo->page_alignment << PAGE_SHIFT;
+
+	if (WARN_ON(min_page_size < mm->chunk_size)) {
+		err = -EINVAL;
+		goto error_fini;
+	}
+
+	if (WARN_ON(min_page_size > SZ_2G)) { /* FIXME: sg limit */
+		err = -EINVAL;
+		goto error_fini;
+	}
+
+	if (WARN_ON((size > SZ_2G &&
+		     (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS)))) {
+		err = -EINVAL;
+		goto error_fini;
+	}
+
+	if (WARN_ON(!IS_ALIGNED(size, min_page_size))) {
+		err = -EINVAL;
+		goto error_fini;
+	}
+
+	mutex_lock(&mgr->lock);
+	if (lpfn <= mgr->visible_size >> PAGE_SHIFT && size > mgr->visible_avail) {
+		mutex_unlock(&mgr->lock);
+		err = -ENOSPC;
+		goto error_fini;
+	}
+
+	if (place->fpfn + (size >> PAGE_SHIFT) != place->lpfn &&
+	    place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		size = roundup_pow_of_two(size);
+		min_page_size = size;
+
+		lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn);
+	}
+
+	remaining_size = size;
+	do {
+		/*
+		 * Limit maximum size to 2GiB due to SG table limitations.
+		 * FIXME: Should maybe be handled as part of sg construction.
+		 */
+		u64 alloc_size = min_t(u64, remaining_size, SZ_2G);
+
+		err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+					     (u64)lpfn << PAGE_SHIFT,
+					     alloc_size,
+					     min_page_size,
+					     &vres->blocks,
+					     vres->flags);
+		if (err)
+			goto error_free_blocks;
+
+		remaining_size -= alloc_size;
+	} while (remaining_size);
+
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks))
+			size = vres->base.size;
+	}
+
+	if (lpfn <= mgr->visible_size >> PAGE_SHIFT) {
+		vres->used_visible_size = size;
+	} else {
+		struct drm_buddy_block *block;
+
+		list_for_each_entry(block, &vres->blocks, link) {
+			u64 start = drm_buddy_block_offset(block);
+
+			if (start < mgr->visible_size) {
+				u64 end = start + drm_buddy_block_size(mm, block);
+
+				vres->used_visible_size +=
+					min(end, mgr->visible_size) - start;
+			}
+		}
+	}
+
+	mgr->visible_avail -= vres->used_visible_size;
+	mutex_unlock(&mgr->lock);
+
+	if (!(vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) &&
+	    xe_is_vram_mgr_blocks_contiguous(mm, &vres->blocks))
+		vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
+
+	/*
+	 * For some kernel objects we still rely on the start when io mapping
+	 * the object.
+	 */
+	if (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) {
+		struct drm_buddy_block *block = list_first_entry(&vres->blocks,
+								 typeof(*block),
+								 link);
+
+		vres->base.start = drm_buddy_block_offset(block) >> PAGE_SHIFT;
+	} else {
+		vres->base.start = XE_BO_INVALID_OFFSET;
+	}
+
+	*res = &vres->base;
+	return 0;
+
+error_free_blocks:
+	drm_buddy_free_list(mm, &vres->blocks);
+	mutex_unlock(&mgr->lock);
+error_fini:
+	ttm_resource_fini(man, &vres->base);
+	kfree(vres);
+
+	return err;
+}
+
+static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
+				struct ttm_resource *res)
+{
+	struct xe_ttm_vram_mgr_resource *vres =
+		to_xe_ttm_vram_mgr_resource(res);
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+	struct drm_buddy *mm = &mgr->mm;
+
+	mutex_lock(&mgr->lock);
+	drm_buddy_free_list(mm, &vres->blocks);
+	mgr->visible_avail += vres->used_visible_size;
+	mutex_unlock(&mgr->lock);
+
+	ttm_resource_fini(man, res);
+
+	kfree(vres);
+}
+
+static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man,
+				  struct drm_printer *printer)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+	struct drm_buddy *mm = &mgr->mm;
+
+	mutex_lock(&mgr->lock);
+	drm_printf(printer, "default_page_size: %lluKiB\n",
+		   mgr->default_page_size >> 10);
+	drm_printf(printer, "visible_avail: %lluMiB\n",
+		   (u64)mgr->visible_avail >> 20);
+	drm_printf(printer, "visible_size: %lluMiB\n",
+		   (u64)mgr->visible_size >> 20);
+
+	drm_buddy_print(mm, printer);
+	mutex_unlock(&mgr->lock);
+	drm_printf(printer, "man size:%llu\n", man->size);
+}
+
+static bool xe_ttm_vram_mgr_intersects(struct ttm_resource_manager *man,
+				       struct ttm_resource *res,
+				       const struct ttm_place *place,
+				       size_t size)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+	struct xe_ttm_vram_mgr_resource *vres =
+		to_xe_ttm_vram_mgr_resource(res);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+
+	if (!place->fpfn && !place->lpfn)
+		return true;
+
+	if (!place->fpfn && place->lpfn == mgr->visible_size >> PAGE_SHIFT)
+		return vres->used_visible_size > 0;
+
+	list_for_each_entry(block, &vres->blocks, link) {
+		unsigned long fpfn =
+			drm_buddy_block_offset(block) >> PAGE_SHIFT;
+		unsigned long lpfn = fpfn +
+			(drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+
+		if (place->fpfn < lpfn && place->lpfn > fpfn)
+			return true;
+	}
+
+	return false;
+}
+
+static bool xe_ttm_vram_mgr_compatible(struct ttm_resource_manager *man,
+				       struct ttm_resource *res,
+				       const struct ttm_place *place,
+				       size_t size)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+	struct xe_ttm_vram_mgr_resource *vres =
+		to_xe_ttm_vram_mgr_resource(res);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+
+	if (!place->fpfn && !place->lpfn)
+		return true;
+
+	if (!place->fpfn && place->lpfn == mgr->visible_size >> PAGE_SHIFT)
+		return vres->used_visible_size == size;
+
+	list_for_each_entry(block, &vres->blocks, link) {
+		unsigned long fpfn =
+			drm_buddy_block_offset(block) >> PAGE_SHIFT;
+		unsigned long lpfn = fpfn +
+			(drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+
+		if (fpfn < place->fpfn || lpfn > place->lpfn)
+			return false;
+	}
+
+	return true;
+}
+
+static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = {
+	.alloc	= xe_ttm_vram_mgr_new,
+	.free	= xe_ttm_vram_mgr_del,
+	.intersects = xe_ttm_vram_mgr_intersects,
+	.compatible = xe_ttm_vram_mgr_compatible,
+	.debug	= xe_ttm_vram_mgr_debug
+};
+
+static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_ttm_vram_mgr *mgr = arg;
+	struct ttm_resource_manager *man = &mgr->manager;
+
+	ttm_resource_manager_set_used(man, false);
+
+	if (ttm_resource_manager_evict_all(&xe->ttm, man))
+		return;
+
+	WARN_ON_ONCE(mgr->visible_avail != mgr->visible_size);
+
+	drm_buddy_fini(&mgr->mm);
+
+	ttm_resource_manager_cleanup(&mgr->manager);
+
+	ttm_set_driver_manager(&xe->ttm, mgr->mem_type, NULL);
+
+	mutex_destroy(&mgr->lock);
+}
+
+int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
+			   u32 mem_type, u64 size, u64 io_size,
+			   u64 default_page_size)
+{
+	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
+
+	man->func = &xe_ttm_vram_mgr_func;
+	mgr->mem_type = mem_type;
+	mutex_init(&mgr->lock);
+	mgr->default_page_size = default_page_size;
+	mgr->visible_size = io_size;
+	mgr->visible_avail = io_size;
+
+	ttm_resource_manager_init(man, &xe->ttm, size);
+	err = drm_buddy_init(&mgr->mm, man->size, default_page_size);
+	if (err)
+		return err;
+
+	ttm_set_driver_manager(&xe->ttm, mem_type, &mgr->manager);
+	ttm_resource_manager_set_used(&mgr->manager, true);
+
+	return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr);
+}
+
+int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_mem_region *vram = &tile->mem.vram;
+
+	mgr->vram = vram;
+	return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id,
+				      vram->usable_size, vram->io_size,
+				      PAGE_SIZE);
+}
+
+int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
+			      struct ttm_resource *res,
+			      u64 offset, u64 length,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table **sgt)
+{
+	struct xe_tile *tile = &xe->tiles[res->mem_type - XE_PL_VRAM0];
+	struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(res);
+	struct xe_res_cursor cursor;
+	struct scatterlist *sg;
+	int num_entries = 0;
+	int i, r;
+
+	if (vres->used_visible_size < res->size)
+		return -EOPNOTSUPP;
+
+	*sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
+	if (!*sgt)
+		return -ENOMEM;
+
+	/* Determine the number of DRM_BUDDY blocks to export */
+	xe_res_first(res, offset, length, &cursor);
+	while (cursor.remaining) {
+		num_entries++;
+		xe_res_next(&cursor, cursor.size);
+	}
+
+	r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
+	if (r)
+		goto error_free;
+
+	/* Initialize scatterlist nodes of sg_table */
+	for_each_sgtable_sg((*sgt), sg, i)
+		sg->length = 0;
+
+	/*
+	 * Walk down DRM_BUDDY blocks to populate scatterlist nodes
+	 * @note: Use iterator api to get first the DRM_BUDDY block
+	 * and the number of bytes from it. Access the following
+	 * DRM_BUDDY block(s) if more buffer needs to exported
+	 */
+	xe_res_first(res, offset, length, &cursor);
+	for_each_sgtable_sg((*sgt), sg, i) {
+		phys_addr_t phys = cursor.start + tile->mem.vram.io_start;
+		size_t size = cursor.size;
+		dma_addr_t addr;
+
+		addr = dma_map_resource(dev, phys, size, dir,
+					DMA_ATTR_SKIP_CPU_SYNC);
+		r = dma_mapping_error(dev, addr);
+		if (r)
+			goto error_unmap;
+
+		sg_set_page(sg, NULL, size, 0);
+		sg_dma_address(sg) = addr;
+		sg_dma_len(sg) = size;
+
+		xe_res_next(&cursor, cursor.size);
+	}
+
+	return 0;
+
+error_unmap:
+	for_each_sgtable_sg((*sgt), sg, i) {
+		if (!sg->length)
+			continue;
+
+		dma_unmap_resource(dev, sg->dma_address,
+				   sg->length, dir,
+				   DMA_ATTR_SKIP_CPU_SYNC);
+	}
+	sg_free_table(*sgt);
+
+error_free:
+	kfree(*sgt);
+	return r;
+}
+
+void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
+			      struct sg_table *sgt)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sgtable_sg(sgt, sg, i)
+		dma_unmap_resource(dev, sg->dma_address,
+				   sg->length, dir,
+				   DMA_ATTR_SKIP_CPU_SYNC);
+	sg_free_table(sgt);
+	kfree(sgt);
+}
+
+u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+
+	return mgr->visible_size;
+}
+
+void xe_ttm_vram_get_used(struct ttm_resource_manager *man,
+			  u64 *used, u64 *used_visible)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+
+	mutex_lock(&mgr->lock);
+	*used = mgr->mm.size - mgr->mm.avail;
+	*used_visible = mgr->visible_size - mgr->visible_avail;
+	mutex_unlock(&mgr->lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
new file mode 100644
index 000000000000..d184e19a9230
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_VRAM_MGR_H_
+#define _XE_TTM_VRAM_MGR_H_
+
+#include "xe_ttm_vram_mgr_types.h"
+
+enum dma_data_direction;
+struct xe_device;
+struct xe_tile;
+
+int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
+			   u32 mem_type, u64 size, u64 io_size,
+			   u64 default_page_size);
+int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr);
+int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
+			      struct ttm_resource *res,
+			      u64 offset, u64 length,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table **sgt);
+void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
+			      struct sg_table *sgt);
+
+u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man);
+void xe_ttm_vram_get_used(struct ttm_resource_manager *man,
+			  u64 *used, u64 *used_visible);
+
+static inline struct xe_ttm_vram_mgr_resource *
+to_xe_ttm_vram_mgr_resource(struct ttm_resource *res)
+{
+	return container_of(res, struct xe_ttm_vram_mgr_resource, base);
+}
+
+static inline struct xe_ttm_vram_mgr *
+to_xe_ttm_vram_mgr(struct ttm_resource_manager *man)
+{
+	return container_of(man, struct xe_ttm_vram_mgr, manager);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
new file mode 100644
index 000000000000..2d75cf126289
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_VRAM_MGR_TYPES_H_
+#define _XE_TTM_VRAM_MGR_TYPES_H_
+
+#include <drm/drm_buddy.h>
+#include <drm/ttm/ttm_device.h>
+
+struct xe_mem_region;
+
+/**
+ * struct xe_ttm_vram_mgr - XE TTM VRAM manager
+ *
+ * Manages placement of TTM resource in VRAM.
+ */
+struct xe_ttm_vram_mgr {
+	/** @manager: Base TTM resource manager */
+	struct ttm_resource_manager manager;
+	/** @mm: DRM buddy allocator which manages the VRAM */
+	struct drm_buddy mm;
+	/** @vram: ptr to details of associated VRAM region */
+	struct xe_mem_region *vram;
+	/** @visible_size: Proped size of the CPU visible portion */
+	u64 visible_size;
+	/** @visible_avail: CPU visible portion still unallocated */
+	u64 visible_avail;
+	/** @default_page_size: default page size */
+	u64 default_page_size;
+	/** @lock: protects allocations of VRAM */
+	struct mutex lock;
+	/** @mem_type: The TTM memory type */
+	u32 mem_type;
+};
+
+/**
+ * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource
+ */
+struct xe_ttm_vram_mgr_resource {
+	/** @base: Base TTM resource */
+	struct ttm_resource base;
+	/** @blocks: list of DRM buddy blocks */
+	struct list_head blocks;
+	/** @used_visible_size: How many CPU visible bytes this resource is using */
+	u64 used_visible_size;
+	/** @flags: flags associated with the resource */
+	unsigned long flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
new file mode 100644
index 000000000000..53ccd338fd8c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_tuning.h"
+
+#include <kunit/visibility.h>
+
+#include "regs/xe_gt_regs.h"
+#include "xe_gt_types.h"
+#include "xe_platform_types.h"
+#include "xe_rtp.h"
+
+#undef XE_REG_MCR
+#define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
+
+static const struct xe_rtp_entry_sr gt_tunings[] = {
+	{ XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS))
+	},
+	{ XE_RTP_NAME("Tuning: 32B Access Enable"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS))
+	},
+
+	/* Xe2 */
+
+	{ XE_RTP_NAME("Tuning: L3 cache"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004)),
+	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+	},
+	{ XE_RTP_NAME("Tuning: L3 cache - media"),
+	  XE_RTP_RULES(MEDIA_VERSION(2000)),
+	  XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+	},
+
+	{}
+};
+
+static const struct xe_rtp_entry_sr engine_tunings[] = {
+	{ XE_RTP_NAME("Tuning: Set Indirect State Override"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1271),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
+	},
+	{}
+};
+
+static const struct xe_rtp_entry_sr lrc_tunings[] = {
+	{ XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  /* read verification is ignored due to 1608008084. */
+	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(FF_MODE2,
+						FF_MODE2_GS_TIMER_MASK,
+						FF_MODE2_GS_TIMER_224))
+	},
+
+	/* DG2 */
+
+	{ XE_RTP_NAME("Tuning: L3 cache"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+	},
+	{ XE_RTP_NAME("Tuning: TDS gang timer"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  /* read verification is ignored as in i915 - need to check enabling */
+	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2,
+						FF_MODE2_TDS_TIMER_MASK,
+						FF_MODE2_TDS_TIMER_128))
+	},
+	{ XE_RTP_NAME("Tuning: TBIMR fast clip"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
+	},
+
+	/* Xe_LPG */
+
+	{ XE_RTP_NAME("Tuning: L3 cache"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+	},
+
+	{}
+};
+
+void xe_tuning_process_gt(struct xe_gt *gt)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
+
+	xe_rtp_process_to_sr(&ctx, gt_tunings, &gt->reg_sr);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
+
+void xe_tuning_process_engine(struct xe_hw_engine *hwe)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process_to_sr(&ctx, engine_tunings, &hwe->reg_sr);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine);
+
+/**
+ * xe_tuning_process_lrc - process lrc tunings
+ * @hwe: engine instance to process tunings for
+ *
+ * Process LRC table for this platform, saving in @hwe all the tunings that need
+ * to be applied on context restore. These are tunings touching registers that
+ * are part of the HW context image.
+ */
+void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process_to_sr(&ctx, lrc_tunings, &hwe->reg_lrc);
+}
diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h
new file mode 100644
index 000000000000..4f9c3ac3b516
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tuning.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TUNING_
+#define _XE_TUNING_
+
+struct xe_gt;
+struct xe_hw_engine;
+
+void xe_tuning_process_gt(struct xe_gt *gt);
+void xe_tuning_process_engine(struct xe_hw_engine *hwe);
+void xe_tuning_process_lrc(struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
new file mode 100644
index 000000000000..25e1ddfd2f86
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_uc.h"
+
+#include "xe_device.h"
+#include "xe_gsc.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_pc.h"
+#include "xe_guc_submit.h"
+#include "xe_huc.h"
+#include "xe_uc_fw.h"
+#include "xe_wopcm.h"
+
+static struct xe_gt *
+uc_to_gt(struct xe_uc *uc)
+{
+	return container_of(uc, struct xe_gt, uc);
+}
+
+static struct xe_device *
+uc_to_xe(struct xe_uc *uc)
+{
+	return gt_to_xe(uc_to_gt(uc));
+}
+
+/* Should be called once at driver load only */
+int xe_uc_init(struct xe_uc *uc)
+{
+	int ret;
+
+	/*
+	 * We call the GuC/HuC/GSC init functions even if GuC submission is off
+	 * to correctly move our tracking of the FW state to "disabled".
+	 */
+
+	ret = xe_guc_init(&uc->guc);
+	if (ret)
+		goto err;
+
+	ret = xe_huc_init(&uc->huc);
+	if (ret)
+		goto err;
+
+	ret = xe_gsc_init(&uc->gsc);
+	if (ret)
+		goto err;
+
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	ret = xe_wopcm_init(&uc->wopcm);
+	if (ret)
+		goto err;
+
+	ret = xe_guc_submit_init(&uc->guc);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	return ret;
+}
+
+/**
+ * xe_uc_init_post_hwconfig - init Uc post hwconfig load
+ * @uc: The UC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_uc_init_post_hwconfig(struct xe_uc *uc)
+{
+	int err;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	err = xe_uc_sanitize_reset(uc);
+	if (err)
+		return err;
+
+	err = xe_guc_init_post_hwconfig(&uc->guc);
+	if (err)
+		return err;
+
+	return xe_gsc_init_post_hwconfig(&uc->gsc);
+}
+
+static int uc_reset(struct xe_uc *uc)
+{
+	struct xe_device *xe = uc_to_xe(uc);
+	int ret;
+
+	ret = xe_guc_reset(&uc->guc);
+	if (ret) {
+		drm_err(&xe->drm, "Failed to reset GuC, ret = %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void xe_uc_sanitize(struct xe_uc *uc)
+{
+	xe_huc_sanitize(&uc->huc);
+	xe_guc_sanitize(&uc->guc);
+}
+
+int xe_uc_sanitize_reset(struct xe_uc *uc)
+{
+	xe_uc_sanitize(uc);
+
+	return uc_reset(uc);
+}
+
+/**
+ * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig
+ * @uc: The UC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_uc_init_hwconfig(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	ret = xe_guc_min_load_for_hwconfig(&uc->guc);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Should be called during driver load, after every GT reset, and after every
+ * suspend to reload / auth the firmwares.
+ */
+int xe_uc_init_hw(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	ret = xe_huc_upload(&uc->huc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_upload(&uc->guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_enable_communication(&uc->guc);
+	if (ret)
+		return ret;
+
+	ret = xe_gt_record_default_lrcs(uc_to_gt(uc));
+	if (ret)
+		return ret;
+
+	ret = xe_guc_post_load_init(&uc->guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_pc_start(&uc->guc.pc);
+	if (ret)
+		return ret;
+
+	/* We don't fail the driver load if HuC fails to auth, but let's warn */
+	ret = xe_huc_auth(&uc->huc, XE_HUC_AUTH_VIA_GUC);
+	xe_gt_assert(uc_to_gt(uc), !ret);
+
+	/* GSC load is async */
+	xe_gsc_load_start(&uc->gsc);
+
+	return 0;
+}
+
+int xe_uc_fini_hw(struct xe_uc *uc)
+{
+	return xe_uc_sanitize_reset(uc);
+}
+
+int xe_uc_reset_prepare(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_reset_prepare(&uc->guc);
+}
+
+void xe_uc_gucrc_disable(struct xe_uc *uc)
+{
+	XE_WARN_ON(xe_guc_pc_gucrc_disable(&uc->guc.pc));
+}
+
+void xe_uc_stop_prepare(struct xe_uc *uc)
+{
+	xe_gsc_wait_for_worker_completion(&uc->gsc);
+	xe_guc_stop_prepare(&uc->guc);
+}
+
+int xe_uc_stop(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_stop(&uc->guc);
+}
+
+int xe_uc_start(struct xe_uc *uc)
+{
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	return xe_guc_start(&uc->guc);
+}
+
+static void uc_reset_wait(struct xe_uc *uc)
+{
+	int ret;
+
+again:
+	xe_guc_reset_wait(&uc->guc);
+
+	ret = xe_uc_reset_prepare(uc);
+	if (ret)
+		goto again;
+}
+
+int xe_uc_suspend(struct xe_uc *uc)
+{
+	int ret;
+
+	/* GuC submission not enabled, nothing to do */
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return 0;
+
+	uc_reset_wait(uc);
+
+	ret = xe_uc_stop(uc);
+	if (ret)
+		return ret;
+
+	return xe_guc_suspend(&uc->guc);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
new file mode 100644
index 000000000000..5d5110c0c834
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_H_
+#define _XE_UC_H_
+
+#include "xe_uc_types.h"
+
+int xe_uc_init(struct xe_uc *uc);
+int xe_uc_init_hwconfig(struct xe_uc *uc);
+int xe_uc_init_post_hwconfig(struct xe_uc *uc);
+int xe_uc_init_hw(struct xe_uc *uc);
+int xe_uc_fini_hw(struct xe_uc *uc);
+void xe_uc_gucrc_disable(struct xe_uc *uc);
+int xe_uc_reset_prepare(struct xe_uc *uc);
+void xe_uc_stop_prepare(struct xe_uc *uc);
+int xe_uc_stop(struct xe_uc *uc);
+int xe_uc_start(struct xe_uc *uc);
+int xe_uc_suspend(struct xe_uc *uc);
+int xe_uc_sanitize_reset(struct xe_uc *uc);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.c b/drivers/gpu/drm/xe/xe_uc_debugfs.c
new file mode 100644
index 000000000000..0a39ec5a6e99
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_debugfs.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+
+#include "xe_gt.h"
+#include "xe_guc_debugfs.h"
+#include "xe_huc_debugfs.h"
+#include "xe_macros.h"
+#include "xe_uc_debugfs.h"
+
+void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent)
+{
+	struct dentry *root;
+
+	root = debugfs_create_dir("uc", parent);
+	if (IS_ERR(root)) {
+		XE_WARN_ON("Create UC directory failed");
+		return;
+	}
+
+	xe_guc_debugfs_register(&uc->guc, root);
+	xe_huc_debugfs_register(&uc->huc, root);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.h b/drivers/gpu/drm/xe/xe_uc_debugfs.h
new file mode 100644
index 000000000000..a13382df2bd7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_DEBUGFS_H_
+#define _XE_UC_DEBUGFS_H_
+
+struct dentry;
+struct xe_uc;
+
+void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
new file mode 100644
index 000000000000..9dff96dfe455
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -0,0 +1,882 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/firmware.h>
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_guc_regs.h"
+#include "xe_bo.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gsc.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_module.h"
+#include "xe_uc_fw.h"
+
+/*
+ * List of required GuC and HuC binaries per-platform. They must be ordered
+ * based on platform, from newer to older.
+ *
+ * Versioning follows the guidelines from
+ * Documentation/driver-api/firmware/firmware-usage-guidelines.rst. There is a
+ * distinction for platforms being officially supported by the driver or not.
+ * Platforms not available publicly or not yet officially supported by the
+ * driver (under force-probe), use the mmp_ver(): the firmware autoselect logic
+ * will select the firmware from disk with filename that matches the full
+ * "mpp version", i.e. major.minor.patch. mmp_ver() should only be used for
+ * this case.
+ *
+ * For platforms officially supported by the driver, the filename always only
+ * ever contains the major version (GuC) or no version at all (HuC).
+ *
+ * After loading the file, the driver parses the versions embedded in the blob.
+ * The major version needs to match a major version supported by the driver (if
+ * any). The minor version is also checked and a notice emitted to the log if
+ * the version found is smaller than the version wanted. This is done only for
+ * informational purposes so users may have a chance to upgrade, but the driver
+ * still loads and use the older firmware.
+ *
+ * Examples:
+ *
+ *	1) Platform officially supported by i915 - using Tigerlake as example.
+ *	   Driver loads the following firmware blobs from disk:
+ *
+ *		- i915/tgl_guc_<major>.bin
+ *		- i915/tgl_huc.bin
+ *
+ *	   <major> number for GuC is checked that it matches the version inside
+ *	   the blob. <minor> version is checked and if smaller than the expected
+ *	   an info message is emitted about that.
+ *
+ *	1) XE_<FUTUREINTELPLATFORM>, still under require_force_probe. Using
+ *	   "wipplat" as a short-name. Driver loads the following firmware blobs
+ *	   from disk:
+ *
+ *		- xe/wipplat_guc_<major>.<minor>.<patch>.bin
+ *		- xe/wipplat_huc_<major>.<minor>.<patch>.bin
+ *
+ *	   <major> and <minor> are checked that they match the version inside
+ *	   the blob. Both of them need to match exactly what the driver is
+ *	   expecting, otherwise it fails.
+ *
+ *	3) Platform officially supported by xe and out of force-probe. Using
+ *	   "plat" as a short-name. Except for the different directory, the
+ *	   behavior is the same as (1). Driver loads the following firmware
+ *	   blobs from disk:
+ *
+ *		- xe/plat_guc_<major>.bin
+ *		- xe/plat_huc.bin
+ *
+ *	   <major> number for GuC is checked that it matches the version inside
+ *	   the blob. <minor> version is checked and if smaller than the expected
+ *	   an info message is emitted about that.
+ *
+ * For the platforms already released with a major version, they should never be
+ * removed from the table. Instead new entries with newer versions may be added
+ * before them, so they take precedence.
+ *
+ * TODO: Currently there's no fallback on major version. That's because xe
+ * driver only supports the one major version of each firmware in the table.
+ * This needs to be fixed when the major version of GuC is updated.
+ */
+
+struct uc_fw_entry {
+	enum xe_platform platform;
+	struct {
+		const char *path;
+		u16 major;
+		u16 minor;
+		bool full_ver_required;
+	};
+};
+
+struct fw_blobs_by_type {
+	const struct uc_fw_entry *entries;
+	u32 count;
+};
+
+#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver)			\
+	fw_def(METEORLAKE,	major_ver(i915,	guc,	mtl,	70, 7))		\
+	fw_def(DG2,		major_ver(i915,	guc,	dg2,	70, 5))		\
+	fw_def(DG1,		major_ver(i915,	guc,	dg1,	70, 5))		\
+	fw_def(ALDERLAKE_N,	major_ver(i915,	guc,	tgl,	70, 5))		\
+	fw_def(ALDERLAKE_P,	major_ver(i915,	guc,	adlp,	70, 5))		\
+	fw_def(ALDERLAKE_S,	major_ver(i915,	guc,	tgl,	70, 5))		\
+	fw_def(ROCKETLAKE,	major_ver(i915,	guc,	tgl,	70, 5))		\
+	fw_def(TIGERLAKE,	major_ver(i915,	guc,	tgl,	70, 5))
+
+#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver)		\
+	fw_def(METEORLAKE,	no_ver(i915,	huc_gsc,	mtl))		\
+	fw_def(DG1,		no_ver(i915,	huc,		dg1))		\
+	fw_def(ALDERLAKE_P,	no_ver(i915,	huc,		tgl))		\
+	fw_def(ALDERLAKE_S,	no_ver(i915,	huc,		tgl))		\
+	fw_def(ROCKETLAKE,	no_ver(i915,	huc,		tgl))		\
+	fw_def(TIGERLAKE,	no_ver(i915,	huc,		tgl))
+
+/* for the GSC FW we match the compatibility version and not the release one */
+#define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver)		\
+	fw_def(METEORLAKE,	major_ver(i915,	gsc,	mtl,	1, 0))
+
+#define MAKE_FW_PATH(dir__, uc__, shortname__, version__)			\
+	__stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin"
+
+#define fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c)			\
+	MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a ## . ## b ## . ## c))
+#define fw_filename_major_ver(dir_, uc_, shortname_, a, b)			\
+	MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a))
+#define fw_filename_no_ver(dir_, uc_, shortname_)				\
+	MAKE_FW_PATH(dir_, uc_, shortname_, "")
+
+#define uc_fw_entry_mmp_ver(dir_, uc_, shortname_, a, b, c)			\
+	{ fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c),			\
+	  a, b, true }
+#define uc_fw_entry_major_ver(dir_, uc_, shortname_, a, b)			\
+	{ fw_filename_major_ver(dir_, uc_, shortname_, a, b),			\
+	  a, b }
+#define uc_fw_entry_no_ver(dir_, uc_, shortname_)				\
+	{ fw_filename_no_ver(dir_, uc_, shortname_),				\
+	  0, 0 }
+
+/* All blobs need to be declared via MODULE_FIRMWARE() */
+#define XE_UC_MODULE_FIRMWARE(platform__, fw_filename)				\
+	MODULE_FIRMWARE(fw_filename);
+
+#define XE_UC_FW_ENTRY(platform__, entry__)					\
+	{									\
+		.platform = XE_ ## platform__,					\
+		entry__,							\
+	},
+
+XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE,
+		     fw_filename_mmp_ver, fw_filename_major_ver)
+XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE,
+		     fw_filename_mmp_ver, fw_filename_no_ver)
+XE_GSC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, fw_filename_major_ver)
+
+static struct xe_gt *
+__uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type)
+{
+	XE_WARN_ON(type >= XE_UC_FW_NUM_TYPES);
+
+	switch (type) {
+	case XE_UC_FW_TYPE_GUC:
+		return container_of(uc_fw, struct xe_gt, uc.guc.fw);
+	case XE_UC_FW_TYPE_HUC:
+		return container_of(uc_fw, struct xe_gt, uc.huc.fw);
+	case XE_UC_FW_TYPE_GSC:
+		return container_of(uc_fw, struct xe_gt, uc.gsc.fw);
+	default:
+		return NULL;
+	}
+}
+
+static struct xe_gt *uc_fw_to_gt(struct xe_uc_fw *uc_fw)
+{
+	return __uc_fw_to_gt(uc_fw, uc_fw->type);
+}
+
+static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw)
+{
+	return gt_to_xe(uc_fw_to_gt(uc_fw));
+}
+
+static void
+uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
+{
+	static const struct uc_fw_entry entries_guc[] = {
+		XE_GUC_FIRMWARE_DEFS(XE_UC_FW_ENTRY,
+				     uc_fw_entry_mmp_ver,
+				     uc_fw_entry_major_ver)
+	};
+	static const struct uc_fw_entry entries_huc[] = {
+		XE_HUC_FIRMWARE_DEFS(XE_UC_FW_ENTRY,
+				     uc_fw_entry_mmp_ver,
+				     uc_fw_entry_no_ver)
+	};
+	static const struct uc_fw_entry entries_gsc[] = {
+		XE_GSC_FIRMWARE_DEFS(XE_UC_FW_ENTRY, uc_fw_entry_major_ver)
+	};
+	static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = {
+		[XE_UC_FW_TYPE_GUC] = { entries_guc, ARRAY_SIZE(entries_guc) },
+		[XE_UC_FW_TYPE_HUC] = { entries_huc, ARRAY_SIZE(entries_huc) },
+		[XE_UC_FW_TYPE_GSC] = { entries_gsc, ARRAY_SIZE(entries_gsc) },
+	};
+	static const struct uc_fw_entry *entries;
+	enum xe_platform p = xe->info.platform;
+	u32 count;
+	int i;
+
+	xe_assert(xe, uc_fw->type < ARRAY_SIZE(blobs_all));
+	entries = blobs_all[uc_fw->type].entries;
+	count = blobs_all[uc_fw->type].count;
+
+	for (i = 0; i < count && p <= entries[i].platform; i++) {
+		if (p == entries[i].platform) {
+			uc_fw->path = entries[i].path;
+			uc_fw->versions.wanted.major = entries[i].major;
+			uc_fw->versions.wanted.minor = entries[i].minor;
+			uc_fw->full_ver_required = entries[i].full_ver_required;
+
+			if (uc_fw->type == XE_UC_FW_TYPE_GSC)
+				uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY;
+			else
+				uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE;
+
+			break;
+		}
+	}
+}
+
+static void
+uc_fw_override(struct xe_uc_fw *uc_fw)
+{
+	char *path_override = NULL;
+
+	/* empty string disables, but it's not allowed for GuC */
+	switch (uc_fw->type) {
+	case XE_UC_FW_TYPE_GUC:
+		if (xe_modparam.guc_firmware_path && *xe_modparam.guc_firmware_path)
+			path_override = xe_modparam.guc_firmware_path;
+		break;
+	case XE_UC_FW_TYPE_HUC:
+		path_override = xe_modparam.huc_firmware_path;
+		break;
+	case XE_UC_FW_TYPE_GSC:
+		path_override = xe_modparam.gsc_firmware_path;
+		break;
+	default:
+		break;
+	}
+
+	if (path_override) {
+		uc_fw->path = path_override;
+		uc_fw->user_overridden = true;
+	}
+}
+
+/**
+ * xe_uc_fw_copy_rsa - copy fw RSA to buffer
+ *
+ * @uc_fw: uC firmware
+ * @dst: dst buffer
+ * @max_len: max number of bytes to copy
+ *
+ * Return: number of copied bytes.
+ */
+size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	u32 size = min_t(u32, uc_fw->rsa_size, max_len);
+
+	xe_assert(xe, !(size % 4));
+	xe_assert(xe, xe_uc_fw_is_available(uc_fw));
+
+	xe_map_memcpy_from(xe, dst, &uc_fw->bo->vmap,
+			   xe_uc_fw_rsa_offset(uc_fw), size);
+
+	return size;
+}
+
+static void uc_fw_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_uc_fw *uc_fw = arg;
+
+	if (!xe_uc_fw_is_available(uc_fw))
+		return;
+
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED);
+}
+
+static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
+{
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
+	struct xe_uc_fw_version *compatibility = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
+
+	xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC);
+	xe_gt_assert(gt, release->major >= 70);
+
+	if (release->major > 70 || release->minor >= 6) {
+		/* v70.6.0 adds CSS header support */
+		compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
+						 css->submission_version);
+		compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
+						 css->submission_version);
+		compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH,
+						 css->submission_version);
+	} else if (release->minor >= 3) {
+		/* v70.3.0 introduced v1.1.0 */
+		compatibility->major = 1;
+		compatibility->minor = 1;
+		compatibility->patch = 0;
+	} else {
+		/* v70.0.0 introduced v1.0.0 */
+		compatibility->major = 1;
+		compatibility->minor = 0;
+		compatibility->patch = 0;
+	}
+
+	uc_fw->private_data_size = css->private_data_size;
+}
+
+int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted;
+	struct xe_uc_fw_version *found = &uc_fw->versions.found[uc_fw->versions.wanted_type];
+
+	/* Driver has no requirement on any version, any is good. */
+	if (!wanted->major)
+		return 0;
+
+	/*
+	 * If full version is required, both major and minor should match.
+	 * Otherwise, at least the major version.
+	 */
+	if (wanted->major != found->major ||
+	    (uc_fw->full_ver_required && wanted->minor != found->minor)) {
+		drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			   found->major, found->minor,
+			   wanted->major, wanted->minor);
+		goto fail;
+	}
+
+	if (wanted->minor > found->minor) {
+		drm_notice(&xe->drm, "%s firmware (%u.%u) is recommended, but only (%u.%u) was found in %s\n",
+			   xe_uc_fw_type_repr(uc_fw->type),
+			   wanted->major, wanted->minor,
+			   found->major, found->minor,
+			   uc_fw->path);
+		drm_info(&xe->drm, "Consider updating your linux-firmware pkg or downloading from %s\n",
+			 XE_UC_FIRMWARE_URL);
+	}
+
+	return 0;
+
+fail:
+	if (xe_uc_fw_is_overridden(uc_fw))
+		return 0;
+
+	return -ENOEXEC;
+}
+
+/* Refer to the "CSS-based Firmware Layout" documentation entry for details */
+static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t fw_size)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
+	struct uc_css_header *css;
+	size_t size;
+
+	/* Check the size of the blob before examining buffer contents */
+	if (unlikely(fw_size < sizeof(struct uc_css_header))) {
+		drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n",
+			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			 fw_size, sizeof(struct uc_css_header));
+		return -ENODATA;
+	}
+
+	css = (struct uc_css_header *)fw_data;
+
+	/* Check integrity of size values inside CSS header */
+	size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
+		css->exponent_size_dw) * sizeof(u32);
+	if (unlikely(size != sizeof(struct uc_css_header))) {
+		drm_warn(&xe->drm,
+			 "%s firmware %s: unexpected header size: %zu != %zu\n",
+			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			 fw_size, sizeof(struct uc_css_header));
+		return -EPROTO;
+	}
+
+	/* uCode size must calculated from other sizes */
+	uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
+
+	/* now RSA */
+	uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
+
+	/* At least, it should have header, uCode and RSA. Size of all three. */
+	size = sizeof(struct uc_css_header) + uc_fw->ucode_size +
+		uc_fw->rsa_size;
+	if (unlikely(fw_size < size)) {
+		drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n",
+			 xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			 fw_size, size);
+		return -ENOEXEC;
+	}
+
+	/* Get version numbers from the CSS header */
+	release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version);
+	release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version);
+	release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version);
+
+	if (uc_fw->type == XE_UC_FW_TYPE_GUC)
+		guc_read_css_info(uc_fw, css);
+
+	return 0;
+}
+
+static bool is_cpd_header(const void *data)
+{
+	const u32 *marker = data;
+
+	return *marker == GSC_CPD_HEADER_MARKER;
+}
+
+static u32 entry_offset(const struct gsc_cpd_header_v2 *header, const char *name)
+{
+	const struct gsc_cpd_entry *entry;
+	int i;
+
+	entry = (void *)header + header->header_length;
+
+	for (i = 0; i < header->num_of_entries; i++, entry++)
+		if (strcmp(entry->name, name) == 0)
+			return entry->offset & GSC_CPD_ENTRY_OFFSET_MASK;
+
+	return 0;
+}
+
+/* Refer to the "GSC-based Firmware Layout" documentation entry for details */
+static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t size,
+			    const char *manifest_entry, const char *css_entry)
+{
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct xe_device *xe = gt_to_xe(gt);
+	const struct gsc_cpd_header_v2 *header = data;
+	struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
+	const struct gsc_manifest_header *manifest;
+	size_t min_size = sizeof(*header);
+	u32 offset;
+
+	/* manifest_entry is mandatory, css_entry is optional */
+	xe_assert(xe, manifest_entry);
+
+	if (size < min_size || !is_cpd_header(header))
+		return -ENOENT;
+
+	if (header->header_length < sizeof(struct gsc_cpd_header_v2)) {
+		xe_gt_err(gt, "invalid CPD header length %u!\n", header->header_length);
+		return -EINVAL;
+	}
+
+	min_size = header->header_length + sizeof(struct gsc_cpd_entry) * header->num_of_entries;
+	if (size < min_size) {
+		xe_gt_err(gt, "FW too small! %zu < %zu\n", size, min_size);
+		return -ENODATA;
+	}
+
+	/* Look for the manifest first */
+	offset = entry_offset(header, manifest_entry);
+	if (!offset) {
+		xe_gt_err(gt, "Failed to find %s manifest!\n",
+			  xe_uc_fw_type_repr(uc_fw->type));
+		return -ENODATA;
+	}
+
+	min_size = offset + sizeof(struct gsc_manifest_header);
+	if (size < min_size) {
+		xe_gt_err(gt, "FW too small! %zu < %zu\n", size, min_size);
+		return -ENODATA;
+	}
+
+	manifest = data + offset;
+
+	release->major = manifest->fw_version.major;
+	release->minor = manifest->fw_version.minor;
+	release->patch = manifest->fw_version.hotfix;
+
+	if (uc_fw->type == XE_UC_FW_TYPE_GSC) {
+		struct xe_gsc *gsc = container_of(uc_fw, struct xe_gsc, fw);
+
+		release->build = manifest->fw_version.build;
+		gsc->security_version = manifest->security_version;
+	}
+
+	/* then optionally look for the css header */
+	if (css_entry) {
+		int ret;
+
+		/*
+		 * This section does not contain a CSS entry on DG2. We
+		 * don't support DG2 HuC right now, so no need to handle
+		 * it, just add a reminder in case that changes.
+		 */
+		xe_assert(xe, xe->info.platform != XE_DG2);
+
+		offset = entry_offset(header, css_entry);
+
+		/* the CSS header parser will check that the CSS header fits */
+		if (offset > size) {
+			xe_gt_err(gt, "FW too small! %zu < %u\n", size, offset);
+			return -ENODATA;
+		}
+
+		ret = parse_css_header(uc_fw, data + offset, size - offset);
+		if (ret)
+			return ret;
+
+		uc_fw->css_offset = offset;
+	}
+
+	uc_fw->has_gsc_headers = true;
+
+	return 0;
+}
+
+static int parse_gsc_layout(struct xe_uc_fw *uc_fw, const void *data, size_t size)
+{
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	const struct gsc_layout_pointers *layout = data;
+	const struct gsc_bpdt_header *bpdt_header = NULL;
+	const struct gsc_bpdt_entry *bpdt_entry = NULL;
+	size_t min_size = sizeof(*layout);
+	int i;
+
+	if (size < min_size) {
+		xe_gt_err(gt, "GSC FW too small! %zu < %zu\n", size, min_size);
+		return -ENODATA;
+	}
+
+	min_size = layout->boot1.offset + layout->boot1.size;
+	if (size < min_size) {
+		xe_gt_err(gt, "GSC FW too small for boot section! %zu < %zu\n",
+			  size, min_size);
+		return -ENODATA;
+	}
+
+	min_size = sizeof(*bpdt_header);
+	if (layout->boot1.size < min_size) {
+		xe_gt_err(gt, "GSC FW boot section too small for BPDT header: %u < %zu\n",
+			  layout->boot1.size, min_size);
+		return -ENODATA;
+	}
+
+	bpdt_header = data + layout->boot1.offset;
+	if (bpdt_header->signature != GSC_BPDT_HEADER_SIGNATURE) {
+		xe_gt_err(gt, "invalid signature for BPDT header: 0x%08x!\n",
+			  bpdt_header->signature);
+		return -EINVAL;
+	}
+
+	min_size += sizeof(*bpdt_entry) * bpdt_header->descriptor_count;
+	if (layout->boot1.size < min_size) {
+		xe_gt_err(gt, "GSC FW boot section too small for BPDT entries: %u < %zu\n",
+			  layout->boot1.size, min_size);
+		return -ENODATA;
+	}
+
+	bpdt_entry = (void *)bpdt_header + sizeof(*bpdt_header);
+	for (i = 0; i < bpdt_header->descriptor_count; i++, bpdt_entry++) {
+		if ((bpdt_entry->type & GSC_BPDT_ENTRY_TYPE_MASK) !=
+		    GSC_BPDT_ENTRY_TYPE_GSC_RBE)
+			continue;
+
+		min_size = bpdt_entry->sub_partition_offset;
+
+		/* the CPD header parser will check that the CPD header fits */
+		if (layout->boot1.size < min_size) {
+			xe_gt_err(gt, "GSC FW boot section too small for CPD offset: %u < %zu\n",
+				  layout->boot1.size, min_size);
+			return -ENODATA;
+		}
+
+		return parse_cpd_header(uc_fw,
+					(void *)bpdt_header + min_size,
+					layout->boot1.size - min_size,
+					"RBEP.man", NULL);
+	}
+
+	xe_gt_err(gt, "couldn't find CPD header in GSC binary!\n");
+	return -ENODATA;
+}
+
+static int parse_headers(struct xe_uc_fw *uc_fw, const struct firmware *fw)
+{
+	int ret;
+
+	/*
+	 * All GuC releases and older HuC ones use CSS headers, while newer HuC
+	 * releases use GSC CPD headers.
+	 */
+	switch (uc_fw->type) {
+	case XE_UC_FW_TYPE_GSC:
+		return parse_gsc_layout(uc_fw, fw->data, fw->size);
+	case XE_UC_FW_TYPE_HUC:
+		ret = parse_cpd_header(uc_fw, fw->data, fw->size, "HUCP.man", "huc_fw");
+		if (!ret || ret != -ENOENT)
+			return ret;
+		fallthrough;
+	case XE_UC_FW_TYPE_GUC:
+		return parse_css_header(uc_fw, fw->data, fw->size);
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#define print_uc_fw_version(p_, version_, prefix_, ...) \
+do { \
+	struct xe_uc_fw_version *ver_ = (version_); \
+	if (ver_->build) \
+		drm_printf(p_, prefix_ " version %u.%u.%u.%u\n", ##__VA_ARGS__, \
+			   ver_->major, ver_->minor, \
+			   ver_->patch, ver_->build); \
+	else \
+		drm_printf(p_, prefix_ " version %u.%u.%u\n", ##__VA_ARGS__, \
+			  ver_->major, ver_->minor, ver_->patch); \
+} while (0)
+
+static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmware_p)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct device *dev = xe->drm.dev;
+	struct drm_printer p = drm_info_printer(dev);
+	const struct firmware *fw = NULL;
+	int err;
+
+	/*
+	 * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status
+	 * before we're looked at the HW caps to see if we have uc support
+	 */
+	BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED);
+	xe_assert(xe, !uc_fw->status);
+	xe_assert(xe, !uc_fw->path);
+
+	uc_fw_auto_select(xe, uc_fw);
+	xe_uc_fw_change_status(uc_fw, uc_fw->path ?
+			       XE_UC_FIRMWARE_SELECTED :
+			       XE_UC_FIRMWARE_NOT_SUPPORTED);
+
+	if (!xe_uc_fw_is_supported(uc_fw))
+		return 0;
+
+	uc_fw_override(uc_fw);
+
+	/* an empty path means the firmware is disabled */
+	if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) {
+		xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED);
+		drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type));
+		return 0;
+	}
+
+	err = request_firmware(&fw, uc_fw->path, dev);
+	if (err)
+		goto fail;
+
+	err = parse_headers(uc_fw, fw);
+	if (err)
+		goto fail;
+
+	print_uc_fw_version(&p,
+			    &uc_fw->versions.found[XE_UC_FW_VER_RELEASE],
+			    "Using %s firmware from %s",
+			    xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+
+	/* for GSC FW we want the compatibility version, which we query after load */
+	if (uc_fw->type != XE_UC_FW_TYPE_GSC) {
+		err = xe_uc_fw_check_version_requirements(uc_fw);
+		if (err)
+			goto fail;
+	}
+
+	*firmware_p = fw;
+
+	return 0;
+
+fail:
+	xe_uc_fw_change_status(uc_fw, err == -ENOENT ?
+			       XE_UC_FIRMWARE_MISSING :
+			       XE_UC_FIRMWARE_ERROR);
+
+	drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n",
+		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+	drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n",
+		 xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL);
+
+	release_firmware(fw);		/* OK even if fw is NULL */
+
+	return err;
+}
+
+static void uc_fw_release(const struct firmware *fw)
+{
+	release_firmware(fw);
+}
+
+static int uc_fw_copy(struct xe_uc_fw *uc_fw, const void *data, size_t size, u32 flags)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_bo *obj;
+	int err;
+
+	obj = xe_managed_bo_create_from_data(xe, tile, data, size, flags);
+	if (IS_ERR(obj)) {
+		drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo",
+			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+		err = PTR_ERR(obj);
+		goto fail;
+	}
+
+	uc_fw->bo = obj;
+	uc_fw->size = size;
+
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_AVAILABLE);
+
+	err = drmm_add_action_or_reset(&xe->drm, uc_fw_fini, uc_fw);
+	if (err)
+		goto fail;
+
+	return 0;
+
+fail:
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_ERROR);
+	drm_notice(&xe->drm, "%s firmware %s: copy failed with error %d\n",
+		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+
+	return err;
+}
+
+int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
+{
+	const struct firmware *fw = NULL;
+	int err;
+
+	err = uc_fw_request(uc_fw, &fw);
+	if (err)
+		return err;
+
+	/* no error and no firmware means nothing to copy */
+	if (!fw)
+		return 0;
+
+	err = uc_fw_copy(uc_fw, fw->data, fw->size,
+			 XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT);
+
+	uc_fw_release(fw);
+
+	return err;
+}
+
+static u32 uc_fw_ggtt_offset(struct xe_uc_fw *uc_fw)
+{
+	return xe_bo_ggtt_addr(uc_fw->bo);
+}
+
+static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+	u32 src_offset, dma_ctrl;
+	int ret;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+	/* Set the source address for the uCode */
+	src_offset = uc_fw_ggtt_offset(uc_fw) + uc_fw->css_offset;
+	xe_mmio_write32(gt, DMA_ADDR_0_LOW, lower_32_bits(src_offset));
+	xe_mmio_write32(gt, DMA_ADDR_0_HIGH,
+			upper_32_bits(src_offset) | DMA_ADDRESS_SPACE_GGTT);
+
+	/* Set the DMA destination */
+	xe_mmio_write32(gt, DMA_ADDR_1_LOW, offset);
+	xe_mmio_write32(gt, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
+
+	/*
+	 * Set the transfer size. The header plus uCode will be copied to WOPCM
+	 * via DMA, excluding any other components
+	 */
+	xe_mmio_write32(gt, DMA_COPY_SIZE,
+			sizeof(struct uc_css_header) + uc_fw->ucode_size);
+
+	/* Start the DMA */
+	xe_mmio_write32(gt, DMA_CTRL,
+			_MASKED_BIT_ENABLE(dma_flags | START_DMA));
+
+	/* Wait for DMA to finish */
+	ret = xe_mmio_wait32(gt, DMA_CTRL, START_DMA, 0, 100000, &dma_ctrl,
+			     false);
+	if (ret)
+		drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
+			xe_uc_fw_type_repr(uc_fw->type), dma_ctrl);
+
+	/* Disable the bits once DMA is over */
+	xe_mmio_write32(gt, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags));
+
+	return ret;
+}
+
+int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
+{
+	struct xe_device *xe = uc_fw_to_xe(uc_fw);
+	int err;
+
+	/* make sure the status was cleared the last time we reset the uc */
+	xe_assert(xe, !xe_uc_fw_is_loaded(uc_fw));
+
+	if (!xe_uc_fw_is_loadable(uc_fw))
+		return -ENOEXEC;
+
+	/* Call custom loader */
+	err = uc_fw_xfer(uc_fw, offset, dma_flags);
+	if (err)
+		goto fail;
+
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_TRANSFERRED);
+	return 0;
+
+fail:
+	drm_err(&xe->drm, "Failed to load %s firmware %s (%d)\n",
+		xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+		err);
+	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOAD_FAIL);
+	return err;
+}
+
+static const char *version_type_repr(enum xe_uc_fw_version_types type)
+{
+	switch (type) {
+	case XE_UC_FW_VER_RELEASE:
+		return "release";
+	case XE_UC_FW_VER_COMPATIBILITY:
+		return "compatibility";
+	default:
+		return "Unknown version type";
+	}
+}
+
+void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p)
+{
+	int i;
+
+	drm_printf(p, "%s firmware: %s\n",
+		   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+	drm_printf(p, "\tstatus: %s\n",
+		   xe_uc_fw_status_repr(uc_fw->status));
+
+	print_uc_fw_version(p, &uc_fw->versions.wanted, "\twanted %s",
+			    version_type_repr(uc_fw->versions.wanted_type));
+
+	for (i = 0; i < XE_UC_FW_VER_TYPE_COUNT; i++) {
+		struct xe_uc_fw_version *ver = &uc_fw->versions.found[i];
+
+		if (ver->major)
+			print_uc_fw_version(p, ver, "\tfound %s",
+					    version_type_repr(i));
+	}
+
+	if (uc_fw->ucode_size)
+		drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
+	if (uc_fw->rsa_size)
+		drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
new file mode 100644
index 000000000000..85c20795d1f8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_H_
+#define _XE_UC_FW_H_
+
+#include <linux/errno.h>
+
+#include "xe_macros.h"
+#include "xe_uc_fw_abi.h"
+#include "xe_uc_fw_types.h"
+
+struct drm_printer;
+
+int xe_uc_fw_init(struct xe_uc_fw *uc_fw);
+size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len);
+int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags);
+int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw);
+void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p);
+
+static inline u32 xe_uc_fw_rsa_offset(struct xe_uc_fw *uc_fw)
+{
+	return sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->css_offset;
+}
+
+static inline void xe_uc_fw_change_status(struct xe_uc_fw *uc_fw,
+					  enum xe_uc_fw_status status)
+{
+	uc_fw->__status = status;
+}
+
+static inline
+const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status)
+{
+	switch (status) {
+	case XE_UC_FIRMWARE_NOT_SUPPORTED:
+		return "N/A";
+	case XE_UC_FIRMWARE_UNINITIALIZED:
+		return "UNINITIALIZED";
+	case XE_UC_FIRMWARE_DISABLED:
+		return "DISABLED";
+	case XE_UC_FIRMWARE_SELECTED:
+		return "SELECTED";
+	case XE_UC_FIRMWARE_MISSING:
+		return "MISSING";
+	case XE_UC_FIRMWARE_ERROR:
+		return "ERROR";
+	case XE_UC_FIRMWARE_AVAILABLE:
+		return "AVAILABLE";
+	case XE_UC_FIRMWARE_INIT_FAIL:
+		return "INIT FAIL";
+	case XE_UC_FIRMWARE_LOADABLE:
+		return "LOADABLE";
+	case XE_UC_FIRMWARE_LOAD_FAIL:
+		return "LOAD FAIL";
+	case XE_UC_FIRMWARE_TRANSFERRED:
+		return "TRANSFERRED";
+	case XE_UC_FIRMWARE_RUNNING:
+		return "RUNNING";
+	}
+	return "<invalid>";
+}
+
+static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status)
+{
+	switch (status) {
+	case XE_UC_FIRMWARE_NOT_SUPPORTED:
+		return -ENODEV;
+	case XE_UC_FIRMWARE_UNINITIALIZED:
+		return -EACCES;
+	case XE_UC_FIRMWARE_DISABLED:
+		return -EPERM;
+	case XE_UC_FIRMWARE_MISSING:
+		return -ENOENT;
+	case XE_UC_FIRMWARE_ERROR:
+		return -ENOEXEC;
+	case XE_UC_FIRMWARE_INIT_FAIL:
+	case XE_UC_FIRMWARE_LOAD_FAIL:
+		return -EIO;
+	case XE_UC_FIRMWARE_SELECTED:
+		return -ESTALE;
+	case XE_UC_FIRMWARE_AVAILABLE:
+	case XE_UC_FIRMWARE_LOADABLE:
+	case XE_UC_FIRMWARE_TRANSFERRED:
+	case XE_UC_FIRMWARE_RUNNING:
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type)
+{
+	switch (type) {
+	case XE_UC_FW_TYPE_GUC:
+		return "GuC";
+	case XE_UC_FW_TYPE_HUC:
+		return "HuC";
+	case XE_UC_FW_TYPE_GSC:
+		return "GSC";
+	default:
+		return "uC";
+	}
+}
+
+static inline enum xe_uc_fw_status
+__xe_uc_fw_status(struct xe_uc_fw *uc_fw)
+{
+	/* shouldn't call this before checking hw/blob availability */
+	XE_WARN_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED);
+	return uc_fw->status;
+}
+
+static inline bool xe_uc_fw_is_supported(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) != XE_UC_FIRMWARE_NOT_SUPPORTED;
+}
+
+static inline bool xe_uc_fw_is_enabled(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) > XE_UC_FIRMWARE_DISABLED;
+}
+
+static inline bool xe_uc_fw_is_disabled(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_DISABLED;
+}
+
+static inline bool xe_uc_fw_is_available(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_AVAILABLE;
+}
+
+static inline bool xe_uc_fw_is_loadable(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE;
+}
+
+static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_TRANSFERRED;
+}
+
+static inline bool xe_uc_fw_is_running(struct xe_uc_fw *uc_fw)
+{
+	return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_RUNNING;
+}
+
+static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw)
+{
+	return uc_fw->user_overridden;
+}
+
+static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw)
+{
+	if (xe_uc_fw_is_loaded(uc_fw))
+		xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOADABLE);
+}
+
+static inline u32 __xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw)
+{
+	return sizeof(struct uc_css_header) + uc_fw->ucode_size;
+}
+
+/**
+ * xe_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded.
+ * @uc_fw: uC firmware.
+ *
+ * Get the size of the firmware and header that will be uploaded to WOPCM.
+ *
+ * Return: Upload firmware size, or zero on firmware fetch failure.
+ */
+static inline u32 xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw)
+{
+	if (!xe_uc_fw_is_available(uc_fw))
+		return 0;
+
+	return __xe_uc_fw_get_upload_size(uc_fw);
+}
+
+#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git"
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
new file mode 100644
index 000000000000..87ade41209d0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_ABI_H
+#define _XE_UC_FW_ABI_H
+
+#include <linux/build_bug.h>
+#include <linux/types.h>
+
+/**
+ * DOC: CSS-based Firmware Layout
+ *
+ * The CSS-based firmware structure is used for GuC releases on all platforms
+ * and for HuC releases up to DG1. Starting from DG2/MTL the HuC uses the GSC
+ * layout instead.
+ * The CSS firmware layout looks like this::
+ *
+ *      +======================================================================+
+ *      |  Firmware blob                                                       |
+ *      +===============+===============+============+============+============+
+ *      |  CSS header   |     uCode     |  RSA key   |  modulus   |  exponent  |
+ *      +===============+===============+============+============+============+
+ *       <-header size->                 <---header size continued ----------->
+ *       <--- size ----------------------------------------------------------->
+ *                                       <-key size->
+ *                                                    <-mod size->
+ *                                                                 <-exp size->
+ *
+ * The firmware may or may not have modulus key and exponent data. The header,
+ * uCode and RSA signature are must-have components that will be used by driver.
+ * Length of each components, which is all in dwords, can be found in header.
+ * In the case that modulus and exponent are not present in fw, a.k.a truncated
+ * image, the length value still appears in header.
+ *
+ * Driver will do some basic fw size validation based on the following rules:
+ *
+ * 1. Header, uCode and RSA are must-have components.
+ * 2. All firmware components, if they present, are in the sequence illustrated
+ *    in the layout table above.
+ * 3. Length info of each component can be found in header, in dwords.
+ * 4. Modulus and exponent key are not required by driver. They may not appear
+ *    in fw. So driver will load a truncated firmware in this case.
+ */
+
+struct uc_css_header {
+	u32 module_type;
+	/*
+	 * header_size includes all non-uCode bits, including css_header, rsa
+	 * key, modulus key and exponent data.
+	 */
+	u32 header_size_dw;
+	u32 header_version;
+	u32 module_id;
+	u32 module_vendor;
+	u32 date;
+#define CSS_DATE_DAY			(0xFF << 0)
+#define CSS_DATE_MONTH			(0xFF << 8)
+#define CSS_DATE_YEAR			(0xFFFF << 16)
+	u32 size_dw; /* uCode plus header_size_dw */
+	u32 key_size_dw;
+	u32 modulus_size_dw;
+	u32 exponent_size_dw;
+	u32 time;
+#define CSS_TIME_HOUR			(0xFF << 0)
+#define CSS_DATE_MIN			(0xFF << 8)
+#define CSS_DATE_SEC			(0xFFFF << 16)
+	char username[8];
+	char buildnumber[12];
+	u32 sw_version;
+#define CSS_SW_VERSION_UC_MAJOR		(0xFF << 16)
+#define CSS_SW_VERSION_UC_MINOR		(0xFF << 8)
+#define CSS_SW_VERSION_UC_PATCH		(0xFF << 0)
+	union {
+		u32 submission_version; /* only applies to GuC */
+		u32 reserved2;
+	};
+	u32 reserved0[12];
+	union {
+		u32 private_data_size; /* only applies to GuC */
+		u32 reserved1;
+	};
+	u32 header_info;
+} __packed;
+static_assert(sizeof(struct uc_css_header) == 128);
+
+/**
+ * DOC: GSC-based Firmware Layout
+ *
+ * The GSC-based firmware structure is used for GSC releases on all platforms
+ * and for HuC releases starting from DG2/MTL. Older HuC releases use the
+ * CSS-based layout instead. Differently from the CSS headers, the GSC headers
+ * uses a directory + entries structure (i.e., there is array of addresses
+ * pointing to specific header extensions identified by a name). Although the
+ * header structures are the same, some of the entries are specific to GSC while
+ * others are specific to HuC. The manifest header entry, which includes basic
+ * information about the binary (like the version) is always present, but it is
+ * named differently based on the binary type.
+ *
+ * The HuC binary starts with a Code Partition Directory (CPD) header. The
+ * entries we're interested in for use in the driver are:
+ *
+ * 1. "HUCP.man": points to the manifest header for the HuC.
+ * 2. "huc_fw": points to the FW code. On platforms that support load via DMA
+ *    and 2-step HuC authentication (i.e. MTL+) this is a full CSS-based binary,
+ *    while if the GSC is the one doing the load (which only happens on DG2)
+ *    this section only contains the uCode.
+ *
+ * The GSC-based HuC firmware layout looks like this::
+ *
+ *	+================================================+
+ *	|  CPD Header                                    |
+ *	+================================================+
+ *	|  CPD entries[]                                 |
+ *	|      entry1                                    |
+ *	|      ...                                       |
+ *	|      entryX                                    |
+ *	|          "HUCP.man"                            |
+ *	|           ...                                  |
+ *	|           offset  >----------------------------|------o
+ *	|      ...                                       |      |
+ *	|      entryY                                    |      |
+ *	|          "huc_fw"                              |      |
+ *	|           ...                                  |      |
+ *	|           offset  >----------------------------|----------o
+ *	+================================================+      |   |
+ *	                                                        |   |
+ *	+================================================+      |   |
+ *	|  Manifest Header                               |<-----o   |
+ *	|      ...                                       |          |
+ *	|      FW version                                |          |
+ *	|      ...                                       |          |
+ *	+================================================+          |
+ *	                                                            |
+ *	+================================================+          |
+ *	|  FW binary                                     |<---------o
+ *	|      CSS (MTL+ only)                           |
+ *	|      uCode                                     |
+ *	|      RSA Key (MTL+ only)                       |
+ *	|      ...                                       |
+ *	+================================================+
+ *
+ * The GSC binary starts instead with a layout header, which contains the
+ * locations of the various partitions of the binary. The one we're interested
+ * in is the boot1 partition, where we can find a BPDT header followed by
+ * entries, one of which points to the RBE sub-section of the partition, which
+ * contains the CPD. The GSC blob does not contain a CSS-based binary, so we
+ * only need to look for the manifest, which is under the "RBEP.man" CPD entry.
+ * Note that we have no need to find where the actual FW code is inside the
+ * image because the GSC ROM will itself parse the headers to find it and load
+ * it.
+ * The GSC firmware header layout looks like this::
+ *
+ *	+================================================+
+ *	|  Layout Pointers                               |
+ *	|      ...                                       |
+ *	|      Boot1 offset  >---------------------------|------o
+ *	|      ...                                       |      |
+ *	+================================================+      |
+ *	                                                        |
+ *	+================================================+      |
+ *	|  BPDT header                                   |<-----o
+ *	+================================================+
+ *	|  BPDT entries[]                                |
+ *	|      entry1                                    |
+ *	|      ...                                       |
+ *	|      entryX                                    |
+ *	|          type == GSC_RBE                       |
+ *	|          offset  >-----------------------------|------o
+ *	|      ...                                       |      |
+ *	+================================================+      |
+ *	                                                        |
+ *	+================================================+      |
+ *	|  CPD Header                                    |<-----o
+ *	+================================================+
+ *	|  CPD entries[]                                 |
+ *	|      entry1                                    |
+ *	|      ...                                       |
+ *	|      entryX                                    |
+ *	|          "RBEP.man"                            |
+ *	|           ...                                  |
+ *	|           offset  >----------------------------|------o
+ *	|      ...                                       |      |
+ *	+================================================+      |
+ *	                                                        |
+ *	+================================================+      |
+ *	| Manifest Header                                |<-----o
+ *	|  ...                                           |
+ *	|  FW version                                    |
+ *	|  ...                                           |
+ *	|  Security version                              |
+ *	|  ...                                           |
+ *	+================================================+
+ */
+
+struct gsc_version {
+	u16 major;
+	u16 minor;
+	u16 hotfix;
+	u16 build;
+} __packed;
+
+struct gsc_partition {
+	u32 offset;
+	u32 size;
+} __packed;
+
+struct gsc_layout_pointers {
+	u8 rom_bypass_vector[16];
+
+	/* size of this header section, not including ROM bypass vector */
+	u16 size;
+
+	/*
+	 * bit0: Backup copy of layout pointers exists
+	 * bits1-15: reserved
+	 */
+	u8 flags;
+
+	u8 reserved;
+
+	u32 crc32;
+
+	struct gsc_partition datap;
+	struct gsc_partition boot1;
+	struct gsc_partition boot2;
+	struct gsc_partition boot3;
+	struct gsc_partition boot4;
+	struct gsc_partition boot5;
+	struct gsc_partition temp_pages;
+} __packed;
+
+/* Boot partition structures */
+struct gsc_bpdt_header {
+	u32 signature;
+#define GSC_BPDT_HEADER_SIGNATURE 0x000055AA
+
+	u16 descriptor_count; /* num of entries after the header */
+
+	u8 version;
+	u8 configuration;
+
+	u32 crc32;
+
+	u32 build_version;
+	struct gsc_version tool_version;
+} __packed;
+
+struct gsc_bpdt_entry {
+	/*
+	 * Bits 0-15: BPDT entry type
+	 * Bits 16-17: reserved
+	 * Bit 18: code sub-partition
+	 * Bits 19-31: reserved
+	 */
+	u32 type;
+#define GSC_BPDT_ENTRY_TYPE_MASK GENMASK(15, 0)
+#define GSC_BPDT_ENTRY_TYPE_GSC_RBE 0x1
+
+	u32 sub_partition_offset; /* from the base of the BPDT header */
+	u32 sub_partition_size;
+} __packed;
+
+/* Code partition directory (CPD) structures */
+struct gsc_cpd_header_v2 {
+	u32 header_marker;
+#define GSC_CPD_HEADER_MARKER 0x44504324
+
+	u32 num_of_entries;
+	u8 header_version;
+	u8 entry_version;
+	u8 header_length; /* in bytes */
+	u8 flags;
+	u32 partition_name;
+	u32 crc32;
+} __packed;
+
+struct gsc_cpd_entry {
+	u8 name[12];
+
+	/*
+	 * Bits 0-24: offset from the beginning of the code partition
+	 * Bit 25: huffman compressed
+	 * Bits 26-31: reserved
+	 */
+	u32 offset;
+#define GSC_CPD_ENTRY_OFFSET_MASK GENMASK(24, 0)
+#define GSC_CPD_ENTRY_HUFFMAN_COMP BIT(25)
+
+	/*
+	 * Module/Item length, in bytes. For Huffman-compressed modules, this
+	 * refers to the uncompressed size. For software-compressed modules,
+	 * this refers to the compressed size.
+	 */
+	u32 length;
+
+	u8 reserved[4];
+} __packed;
+
+struct gsc_manifest_header {
+	u32 header_type; /* 0x4 for manifest type */
+	u32 header_length; /* in dwords */
+	u32 header_version;
+	u32 flags;
+	u32 vendor;
+	u32 date;
+	u32 size; /* In dwords, size of entire manifest (header + extensions) */
+	u32 header_id;
+	u32 internal_data;
+	struct gsc_version fw_version;
+	u32 security_version;
+	struct gsc_version meu_kit_version;
+	u32 meu_manifest_version;
+	u8 general_data[4];
+	u8 reserved3[56];
+	u32 modulus_size; /* in dwords */
+	u32 exponent_size; /* in dwords */
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
new file mode 100644
index 000000000000..ee914a5d8523
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_TYPES_H_
+#define _XE_UC_FW_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/*
+ * +------------+---------------------------------------------------+
+ * |   PHASE    |           FIRMWARE STATUS TRANSITIONS             |
+ * +============+===================================================+
+ * |            |               UNINITIALIZED                       |
+ * +------------+-               /   |   \                         -+
+ * |            |   DISABLED <--/    |    \--> NOT_SUPPORTED        |
+ * | init_early |                    V                              |
+ * |            |                 SELECTED                          |
+ * +------------+-               /   |   \                         -+
+ * |            |    MISSING <--/    |    \--> ERROR                |
+ * |   fetch    |                    V                              |
+ * |            |                 AVAILABLE                         |
+ * +------------+-                   |   \                         -+
+ * |            |                    |    \--> INIT FAIL            |
+ * |   init     |                    V                              |
+ * |            |        /------> LOADABLE <----<-----------\       |
+ * +------------+-       \         /    \        \           \     -+
+ * |            |    LOAD FAIL <--<      \--> TRANSFERRED     \     |
+ * |   upload   |                  \           /   \          /     |
+ * |            |                   \---------/     \--> RUNNING    |
+ * +------------+---------------------------------------------------+
+ */
+
+/*
+ * FIXME: Ported from the i915 and this is state machine is way too complicated.
+ * Circle back and simplify this.
+ */
+enum xe_uc_fw_status {
+	XE_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */
+	XE_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */
+	XE_UC_FIRMWARE_DISABLED, /* disabled */
+	XE_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */
+	XE_UC_FIRMWARE_MISSING, /* blob not found on the system */
+	XE_UC_FIRMWARE_ERROR, /* invalid format or version */
+	XE_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+	XE_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */
+	XE_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
+	XE_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */
+	XE_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
+	XE_UC_FIRMWARE_RUNNING /* init/auth done */
+};
+
+enum xe_uc_fw_type {
+	XE_UC_FW_TYPE_GUC = 0,
+	XE_UC_FW_TYPE_HUC,
+	XE_UC_FW_TYPE_GSC,
+	XE_UC_FW_NUM_TYPES
+};
+
+/**
+ * struct xe_uc_fw_version - Version for XE micro controller firmware
+ */
+struct xe_uc_fw_version {
+	/** @major: major version of the FW */
+	u16 major;
+	/** @minor: minor version of the FW */
+	u16 minor;
+	/** @patch: patch version of the FW */
+	u16 patch;
+	/** @build: build version of the FW (not always available) */
+	u16 build;
+};
+
+enum xe_uc_fw_version_types {
+	XE_UC_FW_VER_RELEASE,
+	XE_UC_FW_VER_COMPATIBILITY,
+	XE_UC_FW_VER_TYPE_COUNT
+};
+
+/**
+ * struct xe_uc_fw - XE micro controller firmware
+ */
+struct xe_uc_fw {
+	/** @type: type uC firmware */
+	enum xe_uc_fw_type type;
+	union {
+		/** @status: firmware load status */
+		const enum xe_uc_fw_status status;
+		/**
+		 * @__status: private firmware load status - only to be used
+		 * by firmware laoding code
+		 */
+		enum xe_uc_fw_status __status;
+	};
+	/** @path: path to uC firmware */
+	const char *path;
+	/** @user_overridden: user provided path to uC firmware via modparam */
+	bool user_overridden;
+	/**
+	 * @full_ver_required: driver still under development and not ready
+	 * for backward-compatible firmware. To be used only for **new**
+	 * platforms, i.e. still under require_force_probe protection and not
+	 * supported by i915.
+	 */
+	bool full_ver_required;
+	/** @size: size of uC firmware including css header */
+	size_t size;
+
+	/** @bo: XE BO for uC firmware */
+	struct xe_bo *bo;
+
+	/** @has_gsc_headers: whether the FW image starts with GSC headers */
+	bool has_gsc_headers;
+
+	/*
+	 * The firmware build process will generate a version header file with
+	 * major and minor version defined. The versions are built into CSS
+	 * header of firmware. The xe kernel driver set the minimal firmware
+	 * version required per platform.
+	 */
+
+	/** @versions: FW versions wanted and found */
+	struct {
+		/** @wanted: firmware version wanted by platform */
+		struct xe_uc_fw_version wanted;
+		/** @wanted_type: type of firmware version wanted (release vs compatibility) */
+		enum xe_uc_fw_version_types wanted_type;
+		/** @found: fw versions found in firmware blob */
+		struct xe_uc_fw_version found[XE_UC_FW_VER_TYPE_COUNT];
+	} versions;
+
+	/** @rsa_size: RSA size */
+	u32 rsa_size;
+	/** @ucode_size: micro kernel size */
+	u32 ucode_size;
+	/** @css_offset: offset within the blob at which the CSS is located */
+	u32 css_offset;
+
+	/** @private_data_size: size of private data found in uC css header */
+	u32 private_data_size;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h
new file mode 100644
index 000000000000..9924e4484866
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_types.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_TYPES_H_
+#define _XE_UC_TYPES_H_
+
+#include "xe_gsc_types.h"
+#include "xe_guc_types.h"
+#include "xe_huc_types.h"
+#include "xe_wopcm_types.h"
+
+/**
+ * struct xe_uc - XE micro controllers
+ */
+struct xe_uc {
+	/** @guc: Graphics micro controller */
+	struct xe_guc guc;
+	/** @huc: HuC */
+	struct xe_huc huc;
+	/** @gsc: Graphics Security Controller */
+	struct xe_gsc gsc;
+	/** @wopcm: WOPCM */
+	struct xe_wopcm wopcm;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
new file mode 100644
index 000000000000..0cfe7289b97e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -0,0 +1,3209 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_vm.h"
+
+#include <linux/dma-fence-array.h>
+#include <linux/nospec.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_print.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_drm_client.h"
+#include "xe_exec_queue.h"
+#include "xe_gt.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_migrate.h"
+#include "xe_pat.h"
+#include "xe_pm.h"
+#include "xe_preempt_fence.h"
+#include "xe_pt.h"
+#include "xe_res_cursor.h"
+#include "xe_sync.h"
+#include "xe_trace.h"
+#include "generated/xe_wa_oob.h"
+#include "xe_wa.h"
+
+#define TEST_VM_ASYNC_OPS_ERROR
+
+static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
+{
+	return vm->gpuvm.r_obj;
+}
+
+/**
+ * xe_vma_userptr_check_repin() - Advisory check for repin needed
+ * @vma: The userptr vma
+ *
+ * Check if the userptr vma has been invalidated since last successful
+ * repin. The check is advisory only and can the function can be called
+ * without the vm->userptr.notifier_lock held. There is no guarantee that the
+ * vma userptr will remain valid after a lockless check, so typically
+ * the call needs to be followed by a proper check under the notifier_lock.
+ *
+ * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
+ */
+int xe_vma_userptr_check_repin(struct xe_vma *vma)
+{
+	return mmu_interval_check_retry(&vma->userptr.notifier,
+					vma->userptr.notifier_seq) ?
+		-EAGAIN : 0;
+}
+
+int xe_vma_userptr_pin_pages(struct xe_vma *vma)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_device *xe = vm->xe;
+	const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
+	struct page **pages;
+	bool in_kthread = !current->mm;
+	unsigned long notifier_seq;
+	int pinned, ret, i;
+	bool read_only = xe_vma_read_only(vma);
+
+	lockdep_assert_held(&vm->lock);
+	xe_assert(xe, xe_vma_is_userptr(vma));
+retry:
+	if (vma->gpuva.flags & XE_VMA_DESTROYED)
+		return 0;
+
+	notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
+	if (notifier_seq == vma->userptr.notifier_seq)
+		return 0;
+
+	pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	if (vma->userptr.sg) {
+		dma_unmap_sgtable(xe->drm.dev,
+				  vma->userptr.sg,
+				  read_only ? DMA_TO_DEVICE :
+				  DMA_BIDIRECTIONAL, 0);
+		sg_free_table(vma->userptr.sg);
+		vma->userptr.sg = NULL;
+	}
+
+	pinned = ret = 0;
+	if (in_kthread) {
+		if (!mmget_not_zero(vma->userptr.notifier.mm)) {
+			ret = -EFAULT;
+			goto mm_closed;
+		}
+		kthread_use_mm(vma->userptr.notifier.mm);
+	}
+
+	while (pinned < num_pages) {
+		ret = get_user_pages_fast(xe_vma_userptr(vma) +
+					  pinned * PAGE_SIZE,
+					  num_pages - pinned,
+					  read_only ? 0 : FOLL_WRITE,
+					  &pages[pinned]);
+		if (ret < 0) {
+			if (in_kthread)
+				ret = 0;
+			break;
+		}
+
+		pinned += ret;
+		ret = 0;
+	}
+
+	if (in_kthread) {
+		kthread_unuse_mm(vma->userptr.notifier.mm);
+		mmput(vma->userptr.notifier.mm);
+	}
+mm_closed:
+	if (ret)
+		goto out;
+
+	ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages,
+						pinned, 0,
+						(u64)pinned << PAGE_SHIFT,
+						xe_sg_segment_size(xe->drm.dev),
+						GFP_KERNEL);
+	if (ret) {
+		vma->userptr.sg = NULL;
+		goto out;
+	}
+	vma->userptr.sg = &vma->userptr.sgt;
+
+	ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg,
+			      read_only ? DMA_TO_DEVICE :
+			      DMA_BIDIRECTIONAL,
+			      DMA_ATTR_SKIP_CPU_SYNC |
+			      DMA_ATTR_NO_KERNEL_MAPPING);
+	if (ret) {
+		sg_free_table(vma->userptr.sg);
+		vma->userptr.sg = NULL;
+		goto out;
+	}
+
+	for (i = 0; i < pinned; ++i) {
+		if (!read_only) {
+			lock_page(pages[i]);
+			set_page_dirty(pages[i]);
+			unlock_page(pages[i]);
+		}
+
+		mark_page_accessed(pages[i]);
+	}
+
+out:
+	release_pages(pages, pinned);
+	kvfree(pages);
+
+	if (!(ret < 0)) {
+		vma->userptr.notifier_seq = notifier_seq;
+		if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
+			goto retry;
+	}
+
+	return ret < 0 ? ret : 0;
+}
+
+static bool preempt_fences_waiting(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		if (!q->compute.pfence ||
+		    (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+						   &q->compute.pfence->flags))) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void free_preempt_fences(struct list_head *list)
+{
+	struct list_head *link, *next;
+
+	list_for_each_safe(link, next, list)
+		xe_preempt_fence_free(to_preempt_fence_from_link(link));
+}
+
+static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
+				unsigned int *count)
+{
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	if (*count >= vm->preempt.num_exec_queues)
+		return 0;
+
+	for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
+		struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
+
+		if (IS_ERR(pfence))
+			return PTR_ERR(pfence);
+
+		list_move_tail(xe_preempt_fence_link(pfence), list);
+	}
+
+	return 0;
+}
+
+static int wait_for_existing_preempt_fences(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		if (q->compute.pfence) {
+			long timeout = dma_fence_wait(q->compute.pfence, false);
+
+			if (timeout < 0)
+				return -ETIME;
+			dma_fence_put(q->compute.pfence);
+			q->compute.pfence = NULL;
+		}
+	}
+
+	return 0;
+}
+
+static bool xe_vm_is_idle(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	xe_vm_assert_held(vm);
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		if (!xe_exec_queue_is_idle(q))
+			return false;
+	}
+
+	return true;
+}
+
+static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
+{
+	struct list_head *link;
+	struct xe_exec_queue *q;
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		struct dma_fence *fence;
+
+		link = list->next;
+		xe_assert(vm->xe, link != list);
+
+		fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
+					     q, q->compute.context,
+					     ++q->compute.seqno);
+		dma_fence_put(q->compute.pfence);
+		q->compute.pfence = fence;
+	}
+}
+
+static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
+{
+	struct xe_exec_queue *q;
+	int err;
+
+	if (!vm->preempt.num_exec_queues)
+		return 0;
+
+	err = xe_bo_lock(bo, true);
+	if (err)
+		return err;
+
+	err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
+	if (err)
+		goto out_unlock;
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
+		if (q->compute.pfence) {
+			dma_resv_add_fence(bo->ttm.base.resv,
+					   q->compute.pfence,
+					   DMA_RESV_USAGE_BOOKKEEP);
+		}
+
+out_unlock:
+	xe_bo_unlock(bo);
+	return err;
+}
+
+static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
+						struct drm_exec *exec)
+{
+	struct xe_exec_queue *q;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
+		q->ops->resume(q);
+
+		drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence,
+					 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
+	}
+}
+
+int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	struct drm_gpuvm_exec vm_exec = {
+		.vm = &vm->gpuvm,
+		.flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
+		.num_fences = 1,
+	};
+	struct drm_exec *exec = &vm_exec.exec;
+	struct dma_fence *pfence;
+	int err;
+	bool wait;
+
+	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
+
+	down_write(&vm->lock);
+	err = drm_gpuvm_exec_lock(&vm_exec);
+	if (err)
+		return err;
+
+	pfence = xe_preempt_fence_create(q, q->compute.context,
+					 ++q->compute.seqno);
+	if (!pfence) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+
+	list_add(&q->compute.link, &vm->preempt.exec_queues);
+	++vm->preempt.num_exec_queues;
+	q->compute.pfence = pfence;
+
+	down_read(&vm->userptr.notifier_lock);
+
+	drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
+				 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
+
+	/*
+	 * Check to see if a preemption on VM is in flight or userptr
+	 * invalidation, if so trigger this preempt fence to sync state with
+	 * other preempt fences on the VM.
+	 */
+	wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
+	if (wait)
+		dma_fence_enable_sw_signaling(pfence);
+
+	up_read(&vm->userptr.notifier_lock);
+
+out_unlock:
+	drm_exec_fini(exec);
+	up_write(&vm->lock);
+
+	return err;
+}
+
+/**
+ * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
+ * @vm: The VM.
+ * @q: The exec_queue
+ */
+void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	if (!xe_vm_in_preempt_fence_mode(vm))
+		return;
+
+	down_write(&vm->lock);
+	list_del(&q->compute.link);
+	--vm->preempt.num_exec_queues;
+	if (q->compute.pfence) {
+		dma_fence_enable_sw_signaling(q->compute.pfence);
+		dma_fence_put(q->compute.pfence);
+		q->compute.pfence = NULL;
+	}
+	up_write(&vm->lock);
+}
+
+/**
+ * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function checks for whether the VM has userptrs that need repinning,
+ * and provides a release-type barrier on the userptr.notifier_lock after
+ * checking.
+ *
+ * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
+ */
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
+{
+	lockdep_assert_held_read(&vm->userptr.notifier_lock);
+
+	return (list_empty(&vm->userptr.repin_list) &&
+		list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
+
+static void xe_vm_kill(struct xe_vm *vm)
+{
+	struct xe_exec_queue *q;
+
+	lockdep_assert_held(&vm->lock);
+
+	xe_vm_lock(vm, false);
+	vm->flags |= XE_VM_FLAG_BANNED;
+	trace_xe_vm_kill(vm);
+
+	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
+		q->ops->kill(q);
+	xe_vm_unlock(vm);
+
+	/* TODO: Inform user the VM is banned */
+}
+
+/**
+ * xe_vm_validate_should_retry() - Whether to retry after a validate error.
+ * @exec: The drm_exec object used for locking before validation.
+ * @err: The error returned from ttm_bo_validate().
+ * @end: A ktime_t cookie that should be set to 0 before first use and
+ * that should be reused on subsequent calls.
+ *
+ * With multiple active VMs, under memory pressure, it is possible that
+ * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
+ * Until ttm properly handles locking in such scenarios, best thing the
+ * driver can do is retry with a timeout. Check if that is necessary, and
+ * if so unlock the drm_exec's objects while keeping the ticket to prepare
+ * for a rerun.
+ *
+ * Return: true if a retry after drm_exec_init() is recommended;
+ * false otherwise.
+ */
+bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
+{
+	ktime_t cur;
+
+	if (err != -ENOMEM)
+		return false;
+
+	cur = ktime_get();
+	*end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
+	if (!ktime_before(cur, *end))
+		return false;
+
+	msleep(20);
+	return true;
+}
+
+static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
+{
+	struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
+	struct drm_gpuva *gpuva;
+	int ret;
+
+	lockdep_assert_held(&vm->lock);
+	drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
+		list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
+			       &vm->rebind_list);
+
+	ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
+	if (ret)
+		return ret;
+
+	vm_bo->evicted = false;
+	return 0;
+}
+
+static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
+				 bool *done)
+{
+	int err;
+
+	/*
+	 * 1 fence for each preempt fence plus a fence for each tile from a
+	 * possible rebind
+	 */
+	err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
+				   vm->xe->info.tile_count);
+	if (err)
+		return err;
+
+	if (xe_vm_is_idle(vm)) {
+		vm->preempt.rebind_deactivated = true;
+		*done = true;
+		return 0;
+	}
+
+	if (!preempt_fences_waiting(vm)) {
+		*done = true;
+		return 0;
+	}
+
+	err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
+	if (err)
+		return err;
+
+	err = wait_for_existing_preempt_fences(vm);
+	if (err)
+		return err;
+
+	return drm_gpuvm_validate(&vm->gpuvm, exec);
+}
+
+static void preempt_rebind_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
+	struct drm_exec exec;
+	struct dma_fence *rebind_fence;
+	unsigned int fence_count = 0;
+	LIST_HEAD(preempt_fences);
+	ktime_t end = 0;
+	int err = 0;
+	long wait;
+	int __maybe_unused tries = 0;
+
+	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
+	trace_xe_vm_rebind_worker_enter(vm);
+
+	down_write(&vm->lock);
+
+	if (xe_vm_is_closed_or_banned(vm)) {
+		up_write(&vm->lock);
+		trace_xe_vm_rebind_worker_exit(vm);
+		return;
+	}
+
+retry:
+	if (xe_vm_userptr_check_repin(vm)) {
+		err = xe_vm_userptr_pin(vm);
+		if (err)
+			goto out_unlock_outer;
+	}
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+
+	drm_exec_until_all_locked(&exec) {
+		bool done = false;
+
+		err = xe_preempt_work_begin(&exec, vm, &done);
+		drm_exec_retry_on_contention(&exec);
+		if (err || done) {
+			drm_exec_fini(&exec);
+			if (err && xe_vm_validate_should_retry(&exec, err, &end))
+				err = -EAGAIN;
+
+			goto out_unlock_outer;
+		}
+	}
+
+	err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
+	if (err)
+		goto out_unlock;
+
+	rebind_fence = xe_vm_rebind(vm, true);
+	if (IS_ERR(rebind_fence)) {
+		err = PTR_ERR(rebind_fence);
+		goto out_unlock;
+	}
+
+	if (rebind_fence) {
+		dma_fence_wait(rebind_fence, false);
+		dma_fence_put(rebind_fence);
+	}
+
+	/* Wait on munmap style VM unbinds */
+	wait = dma_resv_wait_timeout(xe_vm_resv(vm),
+				     DMA_RESV_USAGE_KERNEL,
+				     false, MAX_SCHEDULE_TIMEOUT);
+	if (wait <= 0) {
+		err = -ETIME;
+		goto out_unlock;
+	}
+
+#define retry_required(__tries, __vm) \
+	(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
+	(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
+	__xe_vm_userptr_needs_repin(__vm))
+
+	down_read(&vm->userptr.notifier_lock);
+	if (retry_required(tries, vm)) {
+		up_read(&vm->userptr.notifier_lock);
+		err = -EAGAIN;
+		goto out_unlock;
+	}
+
+#undef retry_required
+
+	spin_lock(&vm->xe->ttm.lru_lock);
+	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+	spin_unlock(&vm->xe->ttm.lru_lock);
+
+	/* Point of no return. */
+	arm_preempt_fences(vm, &preempt_fences);
+	resume_and_reinstall_preempt_fences(vm, &exec);
+	up_read(&vm->userptr.notifier_lock);
+
+out_unlock:
+	drm_exec_fini(&exec);
+out_unlock_outer:
+	if (err == -EAGAIN) {
+		trace_xe_vm_rebind_worker_retry(vm);
+		goto retry;
+	}
+
+	if (err) {
+		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
+		xe_vm_kill(vm);
+	}
+	up_write(&vm->lock);
+
+	free_preempt_fences(&preempt_fences);
+
+	trace_xe_vm_rebind_worker_exit(vm);
+}
+
+static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
+				   const struct mmu_notifier_range *range,
+				   unsigned long cur_seq)
+{
+	struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	long err;
+
+	xe_assert(vm->xe, xe_vma_is_userptr(vma));
+	trace_xe_vma_userptr_invalidate(vma);
+
+	if (!mmu_notifier_range_blockable(range))
+		return false;
+
+	down_write(&vm->userptr.notifier_lock);
+	mmu_interval_set_seq(mni, cur_seq);
+
+	/* No need to stop gpu access if the userptr is not yet bound. */
+	if (!vma->userptr.initial_bind) {
+		up_write(&vm->userptr.notifier_lock);
+		return true;
+	}
+
+	/*
+	 * Tell exec and rebind worker they need to repin and rebind this
+	 * userptr.
+	 */
+	if (!xe_vm_in_fault_mode(vm) &&
+	    !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_move_tail(&vma->userptr.invalidate_link,
+			       &vm->userptr.invalidated);
+		spin_unlock(&vm->userptr.invalidated_lock);
+	}
+
+	up_write(&vm->userptr.notifier_lock);
+
+	/*
+	 * Preempt fences turn into schedule disables, pipeline these.
+	 * Note that even in fault mode, we need to wait for binds and
+	 * unbinds to complete, and those are attached as BOOKMARK fences
+	 * to the vm.
+	 */
+	dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
+			    DMA_RESV_USAGE_BOOKKEEP);
+	dma_resv_for_each_fence_unlocked(&cursor, fence)
+		dma_fence_enable_sw_signaling(fence);
+	dma_resv_iter_end(&cursor);
+
+	err = dma_resv_wait_timeout(xe_vm_resv(vm),
+				    DMA_RESV_USAGE_BOOKKEEP,
+				    false, MAX_SCHEDULE_TIMEOUT);
+	XE_WARN_ON(err <= 0);
+
+	if (xe_vm_in_fault_mode(vm)) {
+		err = xe_vm_invalidate_vma(vma);
+		XE_WARN_ON(err);
+	}
+
+	trace_xe_vma_userptr_invalidate_complete(vma);
+
+	return true;
+}
+
+static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
+	.invalidate = vma_userptr_invalidate,
+};
+
+int xe_vm_userptr_pin(struct xe_vm *vm)
+{
+	struct xe_vma *vma, *next;
+	int err = 0;
+	LIST_HEAD(tmp_evict);
+
+	lockdep_assert_held_write(&vm->lock);
+
+	/* Collect invalidated userptrs */
+	spin_lock(&vm->userptr.invalidated_lock);
+	list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
+				 userptr.invalidate_link) {
+		list_del_init(&vma->userptr.invalidate_link);
+		list_move_tail(&vma->combined_links.userptr,
+			       &vm->userptr.repin_list);
+	}
+	spin_unlock(&vm->userptr.invalidated_lock);
+
+	/* Pin and move to temporary list */
+	list_for_each_entry_safe(vma, next, &vm->userptr.repin_list,
+				 combined_links.userptr) {
+		err = xe_vma_userptr_pin_pages(vma);
+		if (err < 0)
+			return err;
+
+		list_move_tail(&vma->combined_links.userptr, &vm->rebind_list);
+	}
+
+	return 0;
+}
+
+/**
+ * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function does an advisory check for whether the VM has userptrs that
+ * need repinning.
+ *
+ * Return: 0 if there are no indications of userptrs needing repinning,
+ * -EAGAIN if there are.
+ */
+int xe_vm_userptr_check_repin(struct xe_vm *vm)
+{
+	return (list_empty_careful(&vm->userptr.repin_list) &&
+		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+static struct dma_fence *
+xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
+	       struct xe_sync_entry *syncs, u32 num_syncs,
+	       bool first_op, bool last_op);
+
+struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
+{
+	struct dma_fence *fence = NULL;
+	struct xe_vma *vma, *next;
+
+	lockdep_assert_held(&vm->lock);
+	if (xe_vm_in_lr_mode(vm) && !rebind_worker)
+		return NULL;
+
+	xe_vm_assert_held(vm);
+	list_for_each_entry_safe(vma, next, &vm->rebind_list,
+				 combined_links.rebind) {
+		xe_assert(vm->xe, vma->tile_present);
+
+		list_del_init(&vma->combined_links.rebind);
+		dma_fence_put(fence);
+		if (rebind_worker)
+			trace_xe_vma_rebind_worker(vma);
+		else
+			trace_xe_vma_rebind_exec(vma);
+		fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
+		if (IS_ERR(fence))
+			return fence;
+	}
+
+	return fence;
+}
+
+#define VMA_CREATE_FLAG_READ_ONLY	BIT(0)
+#define VMA_CREATE_FLAG_IS_NULL		BIT(1)
+
+static struct xe_vma *xe_vma_create(struct xe_vm *vm,
+				    struct xe_bo *bo,
+				    u64 bo_offset_or_userptr,
+				    u64 start, u64 end,
+				    u16 pat_index, unsigned int flags)
+{
+	struct xe_vma *vma;
+	struct xe_tile *tile;
+	u8 id;
+	bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
+	bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
+
+	xe_assert(vm->xe, start < end);
+	xe_assert(vm->xe, end < vm->size);
+
+	if (!bo && !is_null)	/* userptr */
+		vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	else
+		vma = kzalloc(sizeof(*vma) - sizeof(struct xe_userptr),
+			      GFP_KERNEL);
+	if (!vma) {
+		vma = ERR_PTR(-ENOMEM);
+		return vma;
+	}
+
+	INIT_LIST_HEAD(&vma->combined_links.rebind);
+
+	INIT_LIST_HEAD(&vma->gpuva.gem.entry);
+	vma->gpuva.vm = &vm->gpuvm;
+	vma->gpuva.va.addr = start;
+	vma->gpuva.va.range = end - start + 1;
+	if (read_only)
+		vma->gpuva.flags |= XE_VMA_READ_ONLY;
+	if (is_null)
+		vma->gpuva.flags |= DRM_GPUVA_SPARSE;
+
+	for_each_tile(tile, vm->xe, id)
+		vma->tile_mask |= 0x1 << id;
+
+	if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
+		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
+
+	vma->pat_index = pat_index;
+
+	if (bo) {
+		struct drm_gpuvm_bo *vm_bo;
+
+		xe_bo_assert_held(bo);
+
+		vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
+		if (IS_ERR(vm_bo)) {
+			kfree(vma);
+			return ERR_CAST(vm_bo);
+		}
+
+		drm_gpuvm_bo_extobj_add(vm_bo);
+		drm_gem_object_get(&bo->ttm.base);
+		vma->gpuva.gem.obj = &bo->ttm.base;
+		vma->gpuva.gem.offset = bo_offset_or_userptr;
+		drm_gpuva_link(&vma->gpuva, vm_bo);
+		drm_gpuvm_bo_put(vm_bo);
+	} else /* userptr or null */ {
+		if (!is_null) {
+			u64 size = end - start + 1;
+			int err;
+
+			INIT_LIST_HEAD(&vma->userptr.invalidate_link);
+			vma->gpuva.gem.offset = bo_offset_or_userptr;
+
+			err = mmu_interval_notifier_insert(&vma->userptr.notifier,
+							   current->mm,
+							   xe_vma_userptr(vma), size,
+							   &vma_userptr_notifier_ops);
+			if (err) {
+				kfree(vma);
+				vma = ERR_PTR(err);
+				return vma;
+			}
+
+			vma->userptr.notifier_seq = LONG_MAX;
+		}
+
+		xe_vm_get(vm);
+	}
+
+	return vma;
+}
+
+static void xe_vma_destroy_late(struct xe_vma *vma)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_device *xe = vm->xe;
+	bool read_only = xe_vma_read_only(vma);
+
+	if (xe_vma_is_userptr(vma)) {
+		if (vma->userptr.sg) {
+			dma_unmap_sgtable(xe->drm.dev,
+					  vma->userptr.sg,
+					  read_only ? DMA_TO_DEVICE :
+					  DMA_BIDIRECTIONAL, 0);
+			sg_free_table(vma->userptr.sg);
+			vma->userptr.sg = NULL;
+		}
+
+		/*
+		 * Since userptr pages are not pinned, we can't remove
+		 * the notifer until we're sure the GPU is not accessing
+		 * them anymore
+		 */
+		mmu_interval_notifier_remove(&vma->userptr.notifier);
+		xe_vm_put(vm);
+	} else if (xe_vma_is_null(vma)) {
+		xe_vm_put(vm);
+	} else {
+		xe_bo_put(xe_vma_bo(vma));
+	}
+
+	kfree(vma);
+}
+
+static void vma_destroy_work_func(struct work_struct *w)
+{
+	struct xe_vma *vma =
+		container_of(w, struct xe_vma, destroy_work);
+
+	xe_vma_destroy_late(vma);
+}
+
+static void vma_destroy_cb(struct dma_fence *fence,
+			   struct dma_fence_cb *cb)
+{
+	struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
+
+	INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
+	queue_work(system_unbound_wq, &vma->destroy_work);
+}
+
+static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+
+	lockdep_assert_held_write(&vm->lock);
+	xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
+
+	if (xe_vma_is_userptr(vma)) {
+		xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
+
+		spin_lock(&vm->userptr.invalidated_lock);
+		list_del(&vma->userptr.invalidate_link);
+		spin_unlock(&vm->userptr.invalidated_lock);
+	} else if (!xe_vma_is_null(vma)) {
+		xe_bo_assert_held(xe_vma_bo(vma));
+
+		drm_gpuva_unlink(&vma->gpuva);
+	}
+
+	xe_vm_assert_held(vm);
+	if (fence) {
+		int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
+						 vma_destroy_cb);
+
+		if (ret) {
+			XE_WARN_ON(ret != -ENOENT);
+			xe_vma_destroy_late(vma);
+		}
+	} else {
+		xe_vma_destroy_late(vma);
+	}
+}
+
+/**
+ * xe_vm_prepare_vma() - drm_exec utility to lock a vma
+ * @exec: The drm_exec object we're currently locking for.
+ * @vma: The vma for witch we want to lock the vm resv and any attached
+ * object's resv.
+ * @num_shared: The number of dma-fence slots to pre-allocate in the
+ * objects' reservation objects.
+ *
+ * Return: 0 on success, negative error code on error. In particular
+ * may return -EDEADLK on WW transaction contention and -EINTR if
+ * an interruptible wait is terminated by a signal.
+ */
+int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
+		      unsigned int num_shared)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_bo *bo = xe_vma_bo(vma);
+	int err;
+
+	XE_WARN_ON(!vm);
+	err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
+	if (!err && bo && !bo->vm)
+		err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
+
+	return err;
+}
+
+static void xe_vma_destroy_unlocked(struct xe_vma *vma)
+{
+	struct drm_exec exec;
+	int err;
+
+	drm_exec_init(&exec, 0, 0);
+	drm_exec_until_all_locked(&exec) {
+		err = xe_vm_prepare_vma(&exec, vma, 0);
+		drm_exec_retry_on_contention(&exec);
+		if (XE_WARN_ON(err))
+			break;
+	}
+
+	xe_vma_destroy(vma, NULL);
+
+	drm_exec_fini(&exec);
+}
+
+struct xe_vma *
+xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
+{
+	struct drm_gpuva *gpuva;
+
+	lockdep_assert_held(&vm->lock);
+
+	if (xe_vm_is_closed_or_banned(vm))
+		return NULL;
+
+	xe_assert(vm->xe, start + range <= vm->size);
+
+	gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
+
+	return gpuva ? gpuva_to_vma(gpuva) : NULL;
+}
+
+static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	int err;
+
+	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
+	lockdep_assert_held(&vm->lock);
+
+	err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
+	XE_WARN_ON(err);	/* Shouldn't be possible */
+
+	return err;
+}
+
+static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
+	lockdep_assert_held(&vm->lock);
+
+	drm_gpuva_remove(&vma->gpuva);
+	if (vm->usm.last_fault_vma == vma)
+		vm->usm.last_fault_vma = NULL;
+}
+
+static struct drm_gpuva_op *xe_vm_op_alloc(void)
+{
+	struct xe_vma_op *op;
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+
+	if (unlikely(!op))
+		return NULL;
+
+	return &op->base;
+}
+
+static void xe_vm_free(struct drm_gpuvm *gpuvm);
+
+static struct drm_gpuvm_ops gpuvm_ops = {
+	.op_alloc = xe_vm_op_alloc,
+	.vm_bo_validate = xe_gpuvm_validate,
+	.vm_free = xe_vm_free,
+};
+
+static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
+{
+	u64 pte = 0;
+
+	if (pat_index & BIT(0))
+		pte |= XE_PPGTT_PTE_PAT0;
+
+	if (pat_index & BIT(1))
+		pte |= XE_PPGTT_PTE_PAT1;
+
+	return pte;
+}
+
+static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index,
+				u32 pt_level)
+{
+	u64 pte = 0;
+
+	if (pat_index & BIT(0))
+		pte |= XE_PPGTT_PTE_PAT0;
+
+	if (pat_index & BIT(1))
+		pte |= XE_PPGTT_PTE_PAT1;
+
+	if (pat_index & BIT(2)) {
+		if (pt_level)
+			pte |= XE_PPGTT_PDE_PDPE_PAT2;
+		else
+			pte |= XE_PPGTT_PTE_PAT2;
+	}
+
+	if (pat_index & BIT(3))
+		pte |= XELPG_PPGTT_PTE_PAT3;
+
+	if (pat_index & (BIT(4)))
+		pte |= XE2_PPGTT_PTE_PAT4;
+
+	return pte;
+}
+
+static u64 pte_encode_ps(u32 pt_level)
+{
+	XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
+
+	if (pt_level == 1)
+		return XE_PDE_PS_2M;
+	else if (pt_level == 2)
+		return XE_PDPE_PS_1G;
+
+	return 0;
+}
+
+static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
+			      const u16 pat_index)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u64 pde;
+
+	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pde |= pde_encode_pat_index(xe, pat_index);
+
+	return pde;
+}
+
+static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
+			      u16 pat_index, u32 pt_level)
+{
+	struct xe_device *xe = xe_bo_device(bo);
+	u64 pte;
+
+	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
+	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+	pte |= pte_encode_ps(pt_level);
+
+	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
+		pte |= XE_PPGTT_PTE_DM;
+
+	return pte;
+}
+
+static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
+			       u16 pat_index, u32 pt_level)
+{
+	struct xe_device *xe = xe_vma_vm(vma)->xe;
+
+	pte |= XE_PAGE_PRESENT;
+
+	if (likely(!xe_vma_read_only(vma)))
+		pte |= XE_PAGE_RW;
+
+	pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+	pte |= pte_encode_ps(pt_level);
+
+	if (unlikely(xe_vma_is_null(vma)))
+		pte |= XE_PTE_NULL;
+
+	return pte;
+}
+
+static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
+				u16 pat_index,
+				u32 pt_level, bool devmem, u64 flags)
+{
+	u64 pte;
+
+	/* Avoid passing random bits directly as flags */
+	xe_assert(xe, !(flags & ~XE_PTE_PS64));
+
+	pte = addr;
+	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
+	pte |= pte_encode_pat_index(xe, pat_index, pt_level);
+	pte |= pte_encode_ps(pt_level);
+
+	if (devmem)
+		pte |= XE_PPGTT_PTE_DM;
+
+	pte |= flags;
+
+	return pte;
+}
+
+static const struct xe_pt_ops xelp_pt_ops = {
+	.pte_encode_bo = xelp_pte_encode_bo,
+	.pte_encode_vma = xelp_pte_encode_vma,
+	.pte_encode_addr = xelp_pte_encode_addr,
+	.pde_encode_bo = xelp_pde_encode_bo,
+};
+
+static void vm_destroy_work_func(struct work_struct *w);
+
+/**
+ * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
+ * given tile and vm.
+ * @xe: xe device.
+ * @tile: tile to set up for.
+ * @vm: vm to set up for.
+ *
+ * Sets up a pagetable tree with one page-table per level and a single
+ * leaf PTE. All pagetable entries point to the single page-table or,
+ * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
+ * writes become NOPs.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
+				struct xe_vm *vm)
+{
+	u8 id = tile->id;
+	int i;
+
+	for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
+		vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
+		if (IS_ERR(vm->scratch_pt[id][i]))
+			return PTR_ERR(vm->scratch_pt[id][i]);
+
+		xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
+	}
+
+	return 0;
+}
+
+static void xe_vm_free_scratch(struct xe_vm *vm)
+{
+	struct xe_tile *tile;
+	u8 id;
+
+	if (!xe_vm_has_scratch(vm))
+		return;
+
+	for_each_tile(tile, vm->xe, id) {
+		u32 i;
+
+		if (!vm->pt_root[id])
+			continue;
+
+		for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
+			if (vm->scratch_pt[id][i])
+				xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
+	}
+}
+
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
+{
+	struct drm_gem_object *vm_resv_obj;
+	struct xe_vm *vm;
+	int err, number_tiles = 0;
+	struct xe_tile *tile;
+	u8 id;
+
+	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+	if (!vm)
+		return ERR_PTR(-ENOMEM);
+
+	vm->xe = xe;
+
+	vm->size = 1ull << xe->info.va_bits;
+
+	vm->flags = flags;
+
+	init_rwsem(&vm->lock);
+
+	INIT_LIST_HEAD(&vm->rebind_list);
+
+	INIT_LIST_HEAD(&vm->userptr.repin_list);
+	INIT_LIST_HEAD(&vm->userptr.invalidated);
+	init_rwsem(&vm->userptr.notifier_lock);
+	spin_lock_init(&vm->userptr.invalidated_lock);
+
+	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
+
+	INIT_LIST_HEAD(&vm->preempt.exec_queues);
+	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
+
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_init(&vm->rftree[id]);
+
+	vm->pt_ops = &xelp_pt_ops;
+
+	if (!(flags & XE_VM_FLAG_MIGRATION))
+		xe_device_mem_access_get(xe);
+
+	vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
+	if (!vm_resv_obj) {
+		err = -ENOMEM;
+		goto err_no_resv;
+	}
+
+	drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
+		       vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
+
+	drm_gem_object_put(vm_resv_obj);
+
+	err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+	if (err)
+		goto err_close;
+
+	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+		vm->flags |= XE_VM_FLAG_64K;
+
+	for_each_tile(tile, xe, id) {
+		if (flags & XE_VM_FLAG_MIGRATION &&
+		    tile->id != XE_VM_FLAG_TILE_ID(flags))
+			continue;
+
+		vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
+		if (IS_ERR(vm->pt_root[id])) {
+			err = PTR_ERR(vm->pt_root[id]);
+			vm->pt_root[id] = NULL;
+			goto err_unlock_close;
+		}
+	}
+
+	if (xe_vm_has_scratch(vm)) {
+		for_each_tile(tile, xe, id) {
+			if (!vm->pt_root[id])
+				continue;
+
+			err = xe_vm_create_scratch(xe, tile, vm);
+			if (err)
+				goto err_unlock_close;
+		}
+		vm->batch_invalidate_tlb = true;
+	}
+
+	if (flags & XE_VM_FLAG_LR_MODE) {
+		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
+		vm->flags |= XE_VM_FLAG_LR_MODE;
+		vm->batch_invalidate_tlb = false;
+	}
+
+	/* Fill pt_root after allocating scratch tables */
+	for_each_tile(tile, xe, id) {
+		if (!vm->pt_root[id])
+			continue;
+
+		xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
+	}
+	dma_resv_unlock(xe_vm_resv(vm));
+
+	/* Kernel migration VM shouldn't have a circular loop.. */
+	if (!(flags & XE_VM_FLAG_MIGRATION)) {
+		for_each_tile(tile, xe, id) {
+			struct xe_gt *gt = tile->primary_gt;
+			struct xe_vm *migrate_vm;
+			struct xe_exec_queue *q;
+			u32 create_flags = EXEC_QUEUE_FLAG_VM;
+
+			if (!vm->pt_root[id])
+				continue;
+
+			migrate_vm = xe_migrate_get_vm(tile->migrate);
+			q = xe_exec_queue_create_class(xe, gt, migrate_vm,
+						       XE_ENGINE_CLASS_COPY,
+						       create_flags);
+			xe_vm_put(migrate_vm);
+			if (IS_ERR(q)) {
+				err = PTR_ERR(q);
+				goto err_close;
+			}
+			vm->q[id] = q;
+			number_tiles++;
+		}
+	}
+
+	if (number_tiles > 1)
+		vm->composite_fence_ctx = dma_fence_context_alloc(1);
+
+	mutex_lock(&xe->usm.lock);
+	if (flags & XE_VM_FLAG_FAULT_MODE)
+		xe->usm.num_vm_in_fault_mode++;
+	else if (!(flags & XE_VM_FLAG_MIGRATION))
+		xe->usm.num_vm_in_non_fault_mode++;
+	mutex_unlock(&xe->usm.lock);
+
+	trace_xe_vm_create(vm);
+
+	return vm;
+
+err_unlock_close:
+	dma_resv_unlock(xe_vm_resv(vm));
+err_close:
+	xe_vm_close_and_put(vm);
+	return ERR_PTR(err);
+
+err_no_resv:
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_fini(&vm->rftree[id]);
+	kfree(vm);
+	if (!(flags & XE_VM_FLAG_MIGRATION))
+		xe_device_mem_access_put(xe);
+	return ERR_PTR(err);
+}
+
+static void xe_vm_close(struct xe_vm *vm)
+{
+	down_write(&vm->lock);
+	vm->size = 0;
+	up_write(&vm->lock);
+}
+
+void xe_vm_close_and_put(struct xe_vm *vm)
+{
+	LIST_HEAD(contested);
+	struct xe_device *xe = vm->xe;
+	struct xe_tile *tile;
+	struct xe_vma *vma, *next_vma;
+	struct drm_gpuva *gpuva, *next;
+	u8 id;
+
+	xe_assert(xe, !vm->preempt.num_exec_queues);
+
+	xe_vm_close(vm);
+	if (xe_vm_in_preempt_fence_mode(vm))
+		flush_work(&vm->preempt.rebind_work);
+
+	down_write(&vm->lock);
+	for_each_tile(tile, xe, id) {
+		if (vm->q[id])
+			xe_exec_queue_last_fence_put(vm->q[id], vm);
+	}
+	up_write(&vm->lock);
+
+	for_each_tile(tile, xe, id) {
+		if (vm->q[id]) {
+			xe_exec_queue_kill(vm->q[id]);
+			xe_exec_queue_put(vm->q[id]);
+			vm->q[id] = NULL;
+		}
+	}
+
+	down_write(&vm->lock);
+	xe_vm_lock(vm, false);
+	drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
+		vma = gpuva_to_vma(gpuva);
+
+		if (xe_vma_has_no_bo(vma)) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags |= XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+		}
+
+		xe_vm_remove_vma(vm, vma);
+
+		/* easy case, remove from VMA? */
+		if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
+			list_del_init(&vma->combined_links.rebind);
+			xe_vma_destroy(vma, NULL);
+			continue;
+		}
+
+		list_move_tail(&vma->combined_links.destroy, &contested);
+		vma->gpuva.flags |= XE_VMA_DESTROYED;
+	}
+
+	/*
+	 * All vm operations will add shared fences to resv.
+	 * The only exception is eviction for a shared object,
+	 * but even so, the unbind when evicted would still
+	 * install a fence to resv. Hence it's safe to
+	 * destroy the pagetables immediately.
+	 */
+	xe_vm_free_scratch(vm);
+
+	for_each_tile(tile, xe, id) {
+		if (vm->pt_root[id]) {
+			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+			vm->pt_root[id] = NULL;
+		}
+	}
+	xe_vm_unlock(vm);
+
+	/*
+	 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
+	 * Since we hold a refcount to the bo, we can remove and free
+	 * the members safely without locking.
+	 */
+	list_for_each_entry_safe(vma, next_vma, &contested,
+				 combined_links.destroy) {
+		list_del_init(&vma->combined_links.destroy);
+		xe_vma_destroy_unlocked(vma);
+	}
+
+	up_write(&vm->lock);
+
+	mutex_lock(&xe->usm.lock);
+	if (vm->flags & XE_VM_FLAG_FAULT_MODE)
+		xe->usm.num_vm_in_fault_mode--;
+	else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
+		xe->usm.num_vm_in_non_fault_mode--;
+	mutex_unlock(&xe->usm.lock);
+
+	for_each_tile(tile, xe, id)
+		xe_range_fence_tree_fini(&vm->rftree[id]);
+
+	xe_vm_put(vm);
+}
+
+static void vm_destroy_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm =
+		container_of(w, struct xe_vm, destroy_work);
+	struct xe_device *xe = vm->xe;
+	struct xe_tile *tile;
+	u8 id;
+	void *lookup;
+
+	/* xe_vm_close_and_put was not called? */
+	xe_assert(xe, !vm->size);
+
+	if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
+		xe_device_mem_access_put(xe);
+
+		if (xe->info.has_asid && vm->usm.asid) {
+			mutex_lock(&xe->usm.lock);
+			lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+			xe_assert(xe, lookup == vm);
+			mutex_unlock(&xe->usm.lock);
+		}
+	}
+
+	for_each_tile(tile, xe, id)
+		XE_WARN_ON(vm->pt_root[id]);
+
+	trace_xe_vm_free(vm);
+	dma_fence_put(vm->rebind_fence);
+	kfree(vm);
+}
+
+static void xe_vm_free(struct drm_gpuvm *gpuvm)
+{
+	struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
+
+	/* To destroy the VM we need to be able to sleep */
+	queue_work(system_unbound_wq, &vm->destroy_work);
+}
+
+struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
+{
+	struct xe_vm *vm;
+
+	mutex_lock(&xef->vm.lock);
+	vm = xa_load(&xef->vm.xa, id);
+	if (vm)
+		xe_vm_get(vm);
+	mutex_unlock(&xef->vm.lock);
+
+	return vm;
+}
+
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
+{
+	return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
+					 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
+}
+
+static struct xe_exec_queue *
+to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+	return q ? q : vm->q[0];
+}
+
+static struct dma_fence *
+xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
+		 struct xe_sync_entry *syncs, u32 num_syncs,
+		 bool first_op, bool last_op)
+{
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
+	struct xe_tile *tile;
+	struct dma_fence *fence = NULL;
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	int cur_fence = 0, i;
+	int number_tiles = hweight8(vma->tile_present);
+	int err;
+	u8 id;
+
+	trace_xe_vma_unbind(vma);
+
+	if (number_tiles > 1) {
+		fences = kmalloc_array(number_tiles, sizeof(*fences),
+				       GFP_KERNEL);
+		if (!fences)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_tile(tile, vm->xe, id) {
+		if (!(vma->tile_present & BIT(id)))
+			goto next;
+
+		fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
+					   first_op ? syncs : NULL,
+					   first_op ? num_syncs : 0);
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			goto err_fences;
+		}
+
+		if (fences)
+			fences[cur_fence++] = fence;
+
+next:
+		if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
+			q = list_next_entry(q, multi_gt_list);
+	}
+
+	if (fences) {
+		cf = dma_fence_array_create(number_tiles, fences,
+					    vm->composite_fence_ctx,
+					    vm->composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--vm->composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+	}
+
+	fence = cf ? &cf->base : !fence ?
+		xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
+	if (last_op) {
+		for (i = 0; i < num_syncs; i++)
+			xe_sync_entry_signal(&syncs[i], NULL, fence);
+	}
+
+	return fence;
+
+err_fences:
+	if (fences) {
+		while (cur_fence)
+			dma_fence_put(fences[--cur_fence]);
+		kfree(fences);
+	}
+
+	return ERR_PTR(err);
+}
+
+static struct dma_fence *
+xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
+	       struct xe_sync_entry *syncs, u32 num_syncs,
+	       bool first_op, bool last_op)
+{
+	struct xe_tile *tile;
+	struct dma_fence *fence;
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
+	struct xe_vm *vm = xe_vma_vm(vma);
+	int cur_fence = 0, i;
+	int number_tiles = hweight8(vma->tile_mask);
+	int err;
+	u8 id;
+
+	trace_xe_vma_bind(vma);
+
+	if (number_tiles > 1) {
+		fences = kmalloc_array(number_tiles, sizeof(*fences),
+				       GFP_KERNEL);
+		if (!fences)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_tile(tile, vm->xe, id) {
+		if (!(vma->tile_mask & BIT(id)))
+			goto next;
+
+		fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
+					 first_op ? syncs : NULL,
+					 first_op ? num_syncs : 0,
+					 vma->tile_present & BIT(id));
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			goto err_fences;
+		}
+
+		if (fences)
+			fences[cur_fence++] = fence;
+
+next:
+		if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
+			q = list_next_entry(q, multi_gt_list);
+	}
+
+	if (fences) {
+		cf = dma_fence_array_create(number_tiles, fences,
+					    vm->composite_fence_ctx,
+					    vm->composite_fence_seqno++,
+					    false);
+		if (!cf) {
+			--vm->composite_fence_seqno;
+			err = -ENOMEM;
+			goto err_fences;
+		}
+	}
+
+	if (last_op) {
+		for (i = 0; i < num_syncs; i++)
+			xe_sync_entry_signal(&syncs[i], NULL,
+					     cf ? &cf->base : fence);
+	}
+
+	return cf ? &cf->base : fence;
+
+err_fences:
+	if (fences) {
+		while (cur_fence)
+			dma_fence_put(fences[--cur_fence]);
+		kfree(fences);
+	}
+
+	return ERR_PTR(err);
+}
+
+static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
+			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+			u32 num_syncs, bool immediate, bool first_op,
+			bool last_op)
+{
+	struct dma_fence *fence;
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
+
+	xe_vm_assert_held(vm);
+
+	if (immediate) {
+		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
+				       last_op);
+		if (IS_ERR(fence))
+			return PTR_ERR(fence);
+	} else {
+		int i;
+
+		xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
+
+		fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
+		if (last_op) {
+			for (i = 0; i < num_syncs; i++)
+				xe_sync_entry_signal(&syncs[i], NULL, fence);
+		}
+	}
+
+	if (last_op)
+		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
+	dma_fence_put(fence);
+
+	return 0;
+}
+
+static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
+		      struct xe_bo *bo, struct xe_sync_entry *syncs,
+		      u32 num_syncs, bool immediate, bool first_op,
+		      bool last_op)
+{
+	int err;
+
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(bo);
+
+	if (bo && immediate) {
+		err = xe_bo_validate(bo, vm, true);
+		if (err)
+			return err;
+	}
+
+	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
+			    last_op);
+}
+
+static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
+			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+			u32 num_syncs, bool first_op, bool last_op)
+{
+	struct dma_fence *fence;
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
+
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(xe_vma_bo(vma));
+
+	fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	xe_vma_destroy(vma, fence);
+	if (last_op)
+		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
+	dma_fence_put(fence);
+
+	return 0;
+}
+
+#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
+				    DRM_XE_VM_CREATE_FLAG_LR_MODE | \
+				    DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
+
+int xe_vm_create_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_create *args = data;
+	struct xe_tile *tile;
+	struct xe_vm *vm;
+	u32 id, asid;
+	int err;
+	u32 flags = 0;
+
+	if (XE_IOCTL_DBG(xe, args->extensions))
+		return -EINVAL;
+
+	if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
+		args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
+
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
+			 !xe->info.has_usm))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
+			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
+			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
+			 xe_device_in_non_fault_mode(xe)))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
+			 xe_device_in_fault_mode(xe)))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->extensions))
+		return -EINVAL;
+
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
+		flags |= XE_VM_FLAG_SCRATCH_PAGE;
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
+		flags |= XE_VM_FLAG_LR_MODE;
+	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
+		flags |= XE_VM_FLAG_FAULT_MODE;
+
+	vm = xe_vm_create(xe, flags);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+
+	mutex_lock(&xef->vm.lock);
+	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
+	mutex_unlock(&xef->vm.lock);
+	if (err) {
+		xe_vm_close_and_put(vm);
+		return err;
+	}
+
+	if (xe->info.has_asid) {
+		mutex_lock(&xe->usm.lock);
+		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
+				      XA_LIMIT(1, XE_MAX_ASID - 1),
+				      &xe->usm.next_asid, GFP_KERNEL);
+		mutex_unlock(&xe->usm.lock);
+		if (err < 0) {
+			xe_vm_close_and_put(vm);
+			return err;
+		}
+		err = 0;
+		vm->usm.asid = asid;
+	}
+
+	args->vm_id = id;
+	vm->xef = xef;
+
+	/* Record BO memory for VM pagetable created against client */
+	for_each_tile(tile, xe, id)
+		if (vm->pt_root[id])
+			xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
+	/* Warning: Security issue - never enable by default */
+	args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
+#endif
+
+	return 0;
+}
+
+int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_destroy *args = data;
+	struct xe_vm *vm;
+	int err = 0;
+
+	if (XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	mutex_lock(&xef->vm.lock);
+	vm = xa_load(&xef->vm.xa, args->vm_id);
+	if (XE_IOCTL_DBG(xe, !vm))
+		err = -ENOENT;
+	else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
+		err = -EBUSY;
+	else
+		xa_erase(&xef->vm.xa, args->vm_id);
+	mutex_unlock(&xef->vm.lock);
+
+	if (!err)
+		xe_vm_close_and_put(vm);
+
+	return err;
+}
+
+static const u32 region_to_mem_type[] = {
+	XE_PL_TT,
+	XE_PL_VRAM0,
+	XE_PL_VRAM1,
+};
+
+static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
+			  struct xe_exec_queue *q, u32 region,
+			  struct xe_sync_entry *syncs, u32 num_syncs,
+			  bool first_op, bool last_op)
+{
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
+	int err;
+
+	xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
+
+	if (!xe_vma_has_no_bo(vma)) {
+		err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
+		if (err)
+			return err;
+	}
+
+	if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
+		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
+				  true, first_op, last_op);
+	} else {
+		int i;
+
+		/* Nothing to do, signal fences now */
+		if (last_op) {
+			for (i = 0; i < num_syncs; i++) {
+				struct dma_fence *fence =
+					xe_exec_queue_last_fence_get(wait_exec_queue, vm);
+
+				xe_sync_entry_signal(&syncs[i], NULL, fence);
+			}
+		}
+
+		return 0;
+	}
+}
+
+static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
+			     bool post_commit)
+{
+	down_read(&vm->userptr.notifier_lock);
+	vma->gpuva.flags |= XE_VMA_DESTROYED;
+	up_read(&vm->userptr.notifier_lock);
+	if (post_commit)
+		xe_vm_remove_vma(vm, vma);
+}
+
+#undef ULL
+#define ULL	unsigned long long
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
+{
+	struct xe_vma *vma;
+
+	switch (op->op) {
+	case DRM_GPUVA_OP_MAP:
+		vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
+		       (ULL)op->map.va.addr, (ULL)op->map.va.range);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		vma = gpuva_to_vma(op->remap.unmap->va);
+		vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
+		       op->remap.unmap->keep ? 1 : 0);
+		if (op->remap.prev)
+			vm_dbg(&xe->drm,
+			       "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
+			       (ULL)op->remap.prev->va.addr,
+			       (ULL)op->remap.prev->va.range);
+		if (op->remap.next)
+			vm_dbg(&xe->drm,
+			       "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
+			       (ULL)op->remap.next->va.addr,
+			       (ULL)op->remap.next->va.range);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		vma = gpuva_to_vma(op->unmap.va);
+		vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
+		       op->unmap.keep ? 1 : 0);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		vma = gpuva_to_vma(op->prefetch.va);
+		vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
+		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
+		break;
+	default:
+		drm_warn(&xe->drm, "NOT POSSIBLE");
+	}
+}
+#else
+static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
+{
+}
+#endif
+
+/*
+ * Create operations list from IOCTL arguments, setup operations fields so parse
+ * and commit steps are decoupled from IOCTL arguments. This step can fail.
+ */
+static struct drm_gpuva_ops *
+vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
+			 u64 bo_offset_or_userptr, u64 addr, u64 range,
+			 u32 operation, u32 flags,
+			 u32 prefetch_region, u16 pat_index)
+{
+	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
+	struct drm_gpuva_ops *ops;
+	struct drm_gpuva_op *__op;
+	struct xe_vma_op *op;
+	struct drm_gpuvm_bo *vm_bo;
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	vm_dbg(&vm->xe->drm,
+	       "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
+	       operation, (ULL)addr, (ULL)range,
+	       (ULL)bo_offset_or_userptr);
+
+	switch (operation) {
+	case DRM_XE_VM_BIND_OP_MAP:
+	case DRM_XE_VM_BIND_OP_MAP_USERPTR:
+		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
+						  obj, bo_offset_or_userptr);
+		break;
+	case DRM_XE_VM_BIND_OP_UNMAP:
+		ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
+		break;
+	case DRM_XE_VM_BIND_OP_PREFETCH:
+		ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
+		break;
+	case DRM_XE_VM_BIND_OP_UNMAP_ALL:
+		xe_assert(vm->xe, bo);
+
+		err = xe_bo_lock(bo, true);
+		if (err)
+			return ERR_PTR(err);
+
+		vm_bo = drm_gpuvm_bo_find(&vm->gpuvm, obj);
+		if (!vm_bo)
+			break;
+
+		ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
+		drm_gpuvm_bo_put(vm_bo);
+		xe_bo_unlock(bo);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+		ops = ERR_PTR(-EINVAL);
+	}
+	if (IS_ERR(ops))
+		return ops;
+
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+	if (operation & FORCE_ASYNC_OP_ERROR) {
+		op = list_first_entry_or_null(&ops->list, struct xe_vma_op,
+					      base.entry);
+		if (op)
+			op->inject_error = true;
+	}
+#endif
+
+	drm_gpuva_for_each_op(__op, ops) {
+		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+		if (__op->op == DRM_GPUVA_OP_MAP) {
+			op->map.immediate =
+				flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
+			op->map.read_only =
+				flags & DRM_XE_VM_BIND_FLAG_READONLY;
+			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+			op->map.pat_index = pat_index;
+		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
+			op->prefetch.region = prefetch_region;
+		}
+
+		print_op(vm->xe, __op);
+	}
+
+	return ops;
+}
+
+static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
+			      u16 pat_index, unsigned int flags)
+{
+	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
+	struct drm_exec exec;
+	struct xe_vma *vma;
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	if (bo) {
+		drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+		drm_exec_until_all_locked(&exec) {
+			err = 0;
+			if (!bo->vm) {
+				err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
+				drm_exec_retry_on_contention(&exec);
+			}
+			if (!err) {
+				err = drm_exec_lock_obj(&exec, &bo->ttm.base);
+				drm_exec_retry_on_contention(&exec);
+			}
+			if (err) {
+				drm_exec_fini(&exec);
+				return ERR_PTR(err);
+			}
+		}
+	}
+	vma = xe_vma_create(vm, bo, op->gem.offset,
+			    op->va.addr, op->va.addr +
+			    op->va.range - 1, pat_index, flags);
+	if (bo)
+		drm_exec_fini(&exec);
+
+	if (xe_vma_is_userptr(vma)) {
+		err = xe_vma_userptr_pin_pages(vma);
+		if (err) {
+			prep_vma_destroy(vm, vma, false);
+			xe_vma_destroy_unlocked(vma);
+			return ERR_PTR(err);
+		}
+	} else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
+		err = add_preempt_fences(vm, bo);
+		if (err) {
+			prep_vma_destroy(vm, vma, false);
+			xe_vma_destroy_unlocked(vma);
+			return ERR_PTR(err);
+		}
+	}
+
+	return vma;
+}
+
+static u64 xe_vma_max_pte_size(struct xe_vma *vma)
+{
+	if (vma->gpuva.flags & XE_VMA_PTE_1G)
+		return SZ_1G;
+	else if (vma->gpuva.flags & XE_VMA_PTE_2M)
+		return SZ_2M;
+
+	return SZ_4K;
+}
+
+static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
+{
+	switch (size) {
+	case SZ_1G:
+		vma->gpuva.flags |= XE_VMA_PTE_1G;
+		break;
+	case SZ_2M:
+		vma->gpuva.flags |= XE_VMA_PTE_2M;
+		break;
+	}
+
+	return SZ_4K;
+}
+
+static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	int err = 0;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		err |= xe_vm_insert_vma(vm, op->map.vma);
+		if (!err)
+			op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	case DRM_GPUVA_OP_REMAP:
+	{
+		u8 tile_present =
+			gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
+
+		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
+				 true);
+		op->flags |= XE_VMA_OP_COMMITTED;
+
+		if (op->remap.prev) {
+			err |= xe_vm_insert_vma(vm, op->remap.prev);
+			if (!err)
+				op->flags |= XE_VMA_OP_PREV_COMMITTED;
+			if (!err && op->remap.skip_prev) {
+				op->remap.prev->tile_present =
+					tile_present;
+				op->remap.prev = NULL;
+			}
+		}
+		if (op->remap.next) {
+			err |= xe_vm_insert_vma(vm, op->remap.next);
+			if (!err)
+				op->flags |= XE_VMA_OP_NEXT_COMMITTED;
+			if (!err && op->remap.skip_next) {
+				op->remap.next->tile_present =
+					tile_present;
+				op->remap.next = NULL;
+			}
+		}
+
+		/* Adjust for partial unbind after removin VMA from VM */
+		if (!err) {
+			op->base.remap.unmap->va->va.addr = op->remap.start;
+			op->base.remap.unmap->va->va.range = op->remap.range;
+		}
+		break;
+	}
+	case DRM_GPUVA_OP_UNMAP:
+		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
+		op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		op->flags |= XE_VMA_OP_COMMITTED;
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return err;
+}
+
+
+static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
+				   struct drm_gpuva_ops *ops,
+				   struct xe_sync_entry *syncs, u32 num_syncs,
+				   struct list_head *ops_list, bool last)
+{
+	struct xe_vma_op *last_op = NULL;
+	struct drm_gpuva_op *__op;
+	int err = 0;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	drm_gpuva_for_each_op(__op, ops) {
+		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+		struct xe_vma *vma;
+		bool first = list_empty(ops_list);
+		unsigned int flags = 0;
+
+		INIT_LIST_HEAD(&op->link);
+		list_add_tail(&op->link, ops_list);
+
+		if (first) {
+			op->flags |= XE_VMA_OP_FIRST;
+			op->num_syncs = num_syncs;
+			op->syncs = syncs;
+		}
+
+		op->q = q;
+
+		switch (op->base.op) {
+		case DRM_GPUVA_OP_MAP:
+		{
+			flags |= op->map.read_only ?
+				VMA_CREATE_FLAG_READ_ONLY : 0;
+			flags |= op->map.is_null ?
+				VMA_CREATE_FLAG_IS_NULL : 0;
+
+			vma = new_vma(vm, &op->base.map, op->map.pat_index,
+				      flags);
+			if (IS_ERR(vma))
+				return PTR_ERR(vma);
+
+			op->map.vma = vma;
+			break;
+		}
+		case DRM_GPUVA_OP_REMAP:
+		{
+			struct xe_vma *old =
+				gpuva_to_vma(op->base.remap.unmap->va);
+
+			op->remap.start = xe_vma_start(old);
+			op->remap.range = xe_vma_size(old);
+
+			if (op->base.remap.prev) {
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_READ_ONLY ?
+					VMA_CREATE_FLAG_READ_ONLY : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					DRM_GPUVA_SPARSE ?
+					VMA_CREATE_FLAG_IS_NULL : 0;
+
+				vma = new_vma(vm, op->base.remap.prev,
+					      old->pat_index, flags);
+				if (IS_ERR(vma))
+					return PTR_ERR(vma);
+
+				op->remap.prev = vma;
+
+				/*
+				 * Userptr creates a new SG mapping so
+				 * we must also rebind.
+				 */
+				op->remap.skip_prev = !xe_vma_is_userptr(old) &&
+					IS_ALIGNED(xe_vma_end(vma),
+						   xe_vma_max_pte_size(old));
+				if (op->remap.skip_prev) {
+					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
+					op->remap.range -=
+						xe_vma_end(vma) -
+						xe_vma_start(old);
+					op->remap.start = xe_vma_end(vma);
+				}
+			}
+
+			if (op->base.remap.next) {
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_READ_ONLY ?
+					VMA_CREATE_FLAG_READ_ONLY : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					DRM_GPUVA_SPARSE ?
+					VMA_CREATE_FLAG_IS_NULL : 0;
+
+				vma = new_vma(vm, op->base.remap.next,
+					      old->pat_index, flags);
+				if (IS_ERR(vma))
+					return PTR_ERR(vma);
+
+				op->remap.next = vma;
+
+				/*
+				 * Userptr creates a new SG mapping so
+				 * we must also rebind.
+				 */
+				op->remap.skip_next = !xe_vma_is_userptr(old) &&
+					IS_ALIGNED(xe_vma_start(vma),
+						   xe_vma_max_pte_size(old));
+				if (op->remap.skip_next) {
+					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
+					op->remap.range -=
+						xe_vma_end(old) -
+						xe_vma_start(vma);
+				}
+			}
+			break;
+		}
+		case DRM_GPUVA_OP_UNMAP:
+		case DRM_GPUVA_OP_PREFETCH:
+			/* Nothing to do */
+			break;
+		default:
+			drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+		}
+
+		last_op = op;
+
+		err = xe_vma_op_commit(vm, op);
+		if (err)
+			return err;
+	}
+
+	/* FIXME: Unhandled corner case */
+	XE_WARN_ON(!last_op && last && !list_empty(ops_list));
+
+	if (!last_op)
+		return 0;
+
+	last_op->ops = ops;
+	if (last) {
+		last_op->flags |= XE_VMA_OP_LAST;
+		last_op->num_syncs = num_syncs;
+		last_op->syncs = syncs;
+	}
+
+	return 0;
+}
+
+static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
+		      struct xe_vma *vma, struct xe_vma_op *op)
+{
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	err = xe_vm_prepare_vma(exec, vma, 1);
+	if (err)
+		return err;
+
+	xe_vm_assert_held(vm);
+	xe_bo_assert_held(xe_vma_bo(vma));
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
+				 op->syncs, op->num_syncs,
+				 op->map.immediate || !xe_vm_in_fault_mode(vm),
+				 op->flags & XE_VMA_OP_FIRST,
+				 op->flags & XE_VMA_OP_LAST);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+	{
+		bool prev = !!op->remap.prev;
+		bool next = !!op->remap.next;
+
+		if (!op->remap.unmap_done) {
+			if (prev || next)
+				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
+			err = xe_vm_unbind(vm, vma, op->q, op->syncs,
+					   op->num_syncs,
+					   op->flags & XE_VMA_OP_FIRST,
+					   op->flags & XE_VMA_OP_LAST &&
+					   !prev && !next);
+			if (err)
+				break;
+			op->remap.unmap_done = true;
+		}
+
+		if (prev) {
+			op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
+			err = xe_vm_bind(vm, op->remap.prev, op->q,
+					 xe_vma_bo(op->remap.prev), op->syncs,
+					 op->num_syncs, true, false,
+					 op->flags & XE_VMA_OP_LAST && !next);
+			op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
+			if (err)
+				break;
+			op->remap.prev = NULL;
+		}
+
+		if (next) {
+			op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
+			err = xe_vm_bind(vm, op->remap.next, op->q,
+					 xe_vma_bo(op->remap.next),
+					 op->syncs, op->num_syncs,
+					 true, false,
+					 op->flags & XE_VMA_OP_LAST);
+			op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
+			if (err)
+				break;
+			op->remap.next = NULL;
+		}
+
+		break;
+	}
+	case DRM_GPUVA_OP_UNMAP:
+		err = xe_vm_unbind(vm, vma, op->q, op->syncs,
+				   op->num_syncs, op->flags & XE_VMA_OP_FIRST,
+				   op->flags & XE_VMA_OP_LAST);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
+				     op->syncs, op->num_syncs,
+				     op->flags & XE_VMA_OP_FIRST,
+				     op->flags & XE_VMA_OP_LAST);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	if (err)
+		trace_xe_vma_fail(vma);
+
+	return err;
+}
+
+static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
+			       struct xe_vma_op *op)
+{
+	struct drm_exec exec;
+	int err;
+
+retry_userptr:
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+	drm_exec_until_all_locked(&exec) {
+		err = op_execute(&exec, vm, vma, op);
+		drm_exec_retry_on_contention(&exec);
+		if (err)
+			break;
+	}
+	drm_exec_fini(&exec);
+
+	if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
+		lockdep_assert_held_write(&vm->lock);
+		err = xe_vma_userptr_pin_pages(vma);
+		if (!err)
+			goto retry_userptr;
+
+		trace_xe_vma_fail(vma);
+	}
+
+	return err;
+}
+
+static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	int ret = 0;
+
+	lockdep_assert_held_write(&vm->lock);
+
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+	if (op->inject_error) {
+		op->inject_error = false;
+		return -ENOMEM;
+	}
+#endif
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		ret = __xe_vma_op_execute(vm, op->map.vma, op);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+	{
+		struct xe_vma *vma;
+
+		if (!op->remap.unmap_done)
+			vma = gpuva_to_vma(op->base.remap.unmap->va);
+		else if (op->remap.prev)
+			vma = op->remap.prev;
+		else
+			vma = op->remap.next;
+
+		ret = __xe_vma_op_execute(vm, vma, op);
+		break;
+	}
+	case DRM_GPUVA_OP_UNMAP:
+		ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
+					  op);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		ret = __xe_vma_op_execute(vm,
+					  gpuva_to_vma(op->base.prefetch.va),
+					  op);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return ret;
+}
+
+static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
+{
+	bool last = op->flags & XE_VMA_OP_LAST;
+
+	if (last) {
+		while (op->num_syncs--)
+			xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
+		kfree(op->syncs);
+		if (op->q)
+			xe_exec_queue_put(op->q);
+	}
+	if (!list_empty(&op->link))
+		list_del(&op->link);
+	if (op->ops)
+		drm_gpuva_ops_free(&vm->gpuvm, op->ops);
+	if (last)
+		xe_vm_put(vm);
+}
+
+static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
+			     bool post_commit, bool prev_post_commit,
+			     bool next_post_commit)
+{
+	lockdep_assert_held_write(&vm->lock);
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		if (op->map.vma) {
+			prep_vma_destroy(vm, op->map.vma, post_commit);
+			xe_vma_destroy_unlocked(op->map.vma);
+		}
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
+
+		if (vma) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+			if (post_commit)
+				xe_vm_insert_vma(vm, vma);
+		}
+		break;
+	}
+	case DRM_GPUVA_OP_REMAP:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
+
+		if (op->remap.prev) {
+			prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
+			xe_vma_destroy_unlocked(op->remap.prev);
+		}
+		if (op->remap.next) {
+			prep_vma_destroy(vm, op->remap.next, next_post_commit);
+			xe_vma_destroy_unlocked(op->remap.next);
+		}
+		if (vma) {
+			down_read(&vm->userptr.notifier_lock);
+			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
+			up_read(&vm->userptr.notifier_lock);
+			if (post_commit)
+				xe_vm_insert_vma(vm, vma);
+		}
+		break;
+	}
+	case DRM_GPUVA_OP_PREFETCH:
+		/* Nothing to do */
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+}
+
+static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
+				     struct drm_gpuva_ops **ops,
+				     int num_ops_list)
+{
+	int i;
+
+	for (i = num_ops_list - 1; i; ++i) {
+		struct drm_gpuva_ops *__ops = ops[i];
+		struct drm_gpuva_op *__op;
+
+		if (!__ops)
+			continue;
+
+		drm_gpuva_for_each_op_reverse(__op, __ops) {
+			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+			xe_vma_op_unwind(vm, op,
+					 op->flags & XE_VMA_OP_COMMITTED,
+					 op->flags & XE_VMA_OP_PREV_COMMITTED,
+					 op->flags & XE_VMA_OP_NEXT_COMMITTED);
+		}
+
+		drm_gpuva_ops_free(&vm->gpuvm, __ops);
+	}
+}
+
+static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
+				     struct list_head *ops_list)
+{
+	struct xe_vma_op *op, *next;
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	list_for_each_entry_safe(op, next, ops_list, link) {
+		err = xe_vma_op_execute(vm, op);
+		if (err) {
+			drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
+				 op->base.op, err);
+			/*
+			 * FIXME: Killing VM rather than proper error handling
+			 */
+			xe_vm_kill(vm);
+			return -ENOSPC;
+		}
+		xe_vma_op_cleanup(vm, op);
+	}
+
+	return 0;
+}
+
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+#define SUPPORTED_FLAGS	\
+	(FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \
+	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
+#else
+#define SUPPORTED_FLAGS	\
+	(DRM_XE_VM_BIND_FLAG_READONLY | \
+	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \
+	 0xffff)
+#endif
+#define XE_64K_PAGE_MASK 0xffffull
+#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
+
+#define MAX_BINDS	512	/* FIXME: Picking random upper limit */
+
+static int vm_bind_ioctl_check_args(struct xe_device *xe,
+				    struct drm_xe_vm_bind *args,
+				    struct drm_xe_vm_bind_op **bind_ops)
+{
+	int err;
+	int i;
+
+	if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS))
+		return -EINVAL;
+
+	if (args->num_binds > 1) {
+		u64 __user *bind_user =
+			u64_to_user_ptr(args->vector_of_binds);
+
+		*bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) *
+				    args->num_binds, GFP_KERNEL);
+		if (!*bind_ops)
+			return -ENOMEM;
+
+		err = __copy_from_user(*bind_ops, bind_user,
+				       sizeof(struct drm_xe_vm_bind_op) *
+				       args->num_binds);
+		if (XE_IOCTL_DBG(xe, err)) {
+			err = -EFAULT;
+			goto free_bind_ops;
+		}
+	} else {
+		*bind_ops = &args->bind;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = (*bind_ops)[i].range;
+		u64 addr = (*bind_ops)[i].addr;
+		u32 op = (*bind_ops)[i].op;
+		u32 flags = (*bind_ops)[i].flags;
+		u32 obj = (*bind_ops)[i].obj;
+		u64 obj_offset = (*bind_ops)[i].obj_offset;
+		u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
+		bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+		u16 pat_index = (*bind_ops)[i].pat_index;
+		u16 coh_mode;
+
+		if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
+		(*bind_ops)[i].pat_index = pat_index;
+		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+		if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
+		    XE_IOCTL_DBG(xe, obj && is_null) ||
+		    XE_IOCTL_DBG(xe, obj_offset && is_null) ||
+		    XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
+				 is_null) ||
+		    XE_IOCTL_DBG(xe, !obj &&
+				 op == DRM_XE_VM_BIND_OP_MAP &&
+				 !is_null) ||
+		    XE_IOCTL_DBG(xe, !obj &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_DBG(xe, addr &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_DBG(xe, range &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
+		    XE_IOCTL_DBG(xe, obj &&
+				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+		    XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+		    XE_IOCTL_DBG(xe, obj &&
+				 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_DBG(xe, prefetch_region &&
+				 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
+		    XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
+				       xe->info.mem_region_mask)) ||
+		    XE_IOCTL_DBG(xe, obj &&
+				 op == DRM_XE_VM_BIND_OP_UNMAP)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+
+		if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, !range &&
+				 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
+			err = -EINVAL;
+			goto free_bind_ops;
+		}
+	}
+
+	return 0;
+
+free_bind_ops:
+	if (args->num_binds > 1)
+		kfree(*bind_ops);
+	return err;
+}
+
+static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
+				       struct xe_exec_queue *q,
+				       struct xe_sync_entry *syncs,
+				       int num_syncs)
+{
+	struct dma_fence *fence;
+	int i, err = 0;
+
+	fence = xe_sync_in_fence_get(syncs, num_syncs,
+				     to_wait_exec_queue(vm, q), vm);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	for (i = 0; i < num_syncs; i++)
+		xe_sync_entry_signal(&syncs[i], NULL, fence);
+
+	xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
+				     fence);
+	dma_fence_put(fence);
+
+	return err;
+}
+
+int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_vm_bind *args = data;
+	struct drm_xe_sync __user *syncs_user;
+	struct xe_bo **bos = NULL;
+	struct drm_gpuva_ops **ops = NULL;
+	struct xe_vm *vm;
+	struct xe_exec_queue *q = NULL;
+	u32 num_syncs;
+	struct xe_sync_entry *syncs = NULL;
+	struct drm_xe_vm_bind_op *bind_ops;
+	LIST_HEAD(ops_list);
+	int err;
+	int i;
+
+	err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
+	if (err)
+		return err;
+
+	if (args->exec_queue_id) {
+		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+		if (XE_IOCTL_DBG(xe, !q)) {
+			err = -ENOENT;
+			goto free_objs;
+		}
+
+		if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
+			err = -EINVAL;
+			goto put_exec_queue;
+		}
+	}
+
+	vm = xe_vm_lookup(xef, args->vm_id);
+	if (XE_IOCTL_DBG(xe, !vm)) {
+		err = -EINVAL;
+		goto put_exec_queue;
+	}
+
+	err = down_write_killable(&vm->lock);
+	if (err)
+		goto put_vm;
+
+	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
+		err = -ENOENT;
+		goto release_vm_lock;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+
+		if (XE_IOCTL_DBG(xe, range > vm->size) ||
+		    XE_IOCTL_DBG(xe, addr > vm->size - range)) {
+			err = -EINVAL;
+			goto release_vm_lock;
+		}
+	}
+
+	if (args->num_binds) {
+		bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL);
+		if (!bos) {
+			err = -ENOMEM;
+			goto release_vm_lock;
+		}
+
+		ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL);
+		if (!ops) {
+			err = -ENOMEM;
+			goto release_vm_lock;
+		}
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		struct drm_gem_object *gem_obj;
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 obj = bind_ops[i].obj;
+		u64 obj_offset = bind_ops[i].obj_offset;
+		u16 pat_index = bind_ops[i].pat_index;
+		u16 coh_mode;
+
+		if (!obj)
+			continue;
+
+		gem_obj = drm_gem_object_lookup(file, obj);
+		if (XE_IOCTL_DBG(xe, !gem_obj)) {
+			err = -ENOENT;
+			goto put_obj;
+		}
+		bos[i] = gem_to_xe_bo(gem_obj);
+
+		if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
+		    XE_IOCTL_DBG(xe, obj_offset >
+				 bos[i]->size - range)) {
+			err = -EINVAL;
+			goto put_obj;
+		}
+
+		if (bos[i]->flags & XE_BO_INTERNAL_64K) {
+			if (XE_IOCTL_DBG(xe, obj_offset &
+					 XE_64K_PAGE_MASK) ||
+			    XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
+			    XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
+				err = -EINVAL;
+				goto put_obj;
+			}
+		}
+
+		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+		if (bos[i]->cpu_caching) {
+			if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+					 bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
+				err = -EINVAL;
+				goto put_obj;
+			}
+		} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
+			/*
+			 * Imported dma-buf from a different device should
+			 * require 1way or 2way coherency since we don't know
+			 * how it was mapped on the CPU. Just assume is it
+			 * potentially cached on CPU side.
+			 */
+			err = -EINVAL;
+			goto put_obj;
+		}
+	}
+
+	if (args->num_syncs) {
+		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
+		if (!syncs) {
+			err = -ENOMEM;
+			goto put_obj;
+		}
+	}
+
+	syncs_user = u64_to_user_ptr(args->syncs);
+	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
+		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
+					  &syncs_user[num_syncs],
+					  (xe_vm_in_lr_mode(vm) ?
+					   SYNC_PARSE_FLAG_LR_MODE : 0) |
+					  (!args->num_binds ?
+					   SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
+		if (err)
+			goto free_syncs;
+	}
+
+	if (!args->num_binds) {
+		err = -ENODATA;
+		goto free_syncs;
+	}
+
+	for (i = 0; i < args->num_binds; ++i) {
+		u64 range = bind_ops[i].range;
+		u64 addr = bind_ops[i].addr;
+		u32 op = bind_ops[i].op;
+		u32 flags = bind_ops[i].flags;
+		u64 obj_offset = bind_ops[i].obj_offset;
+		u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
+		u16 pat_index = bind_ops[i].pat_index;
+
+		ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
+						  addr, range, op, flags,
+						  prefetch_region, pat_index);
+		if (IS_ERR(ops[i])) {
+			err = PTR_ERR(ops[i]);
+			ops[i] = NULL;
+			goto unwind_ops;
+		}
+
+		err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
+					      &ops_list,
+					      i == args->num_binds - 1);
+		if (err)
+			goto unwind_ops;
+	}
+
+	/* Nothing to do */
+	if (list_empty(&ops_list)) {
+		err = -ENODATA;
+		goto unwind_ops;
+	}
+
+	xe_vm_get(vm);
+	if (q)
+		xe_exec_queue_get(q);
+
+	err = vm_bind_ioctl_ops_execute(vm, &ops_list);
+
+	up_write(&vm->lock);
+
+	if (q)
+		xe_exec_queue_put(q);
+	xe_vm_put(vm);
+
+	for (i = 0; bos && i < args->num_binds; ++i)
+		xe_bo_put(bos[i]);
+
+	kfree(bos);
+	kfree(ops);
+	if (args->num_binds > 1)
+		kfree(bind_ops);
+
+	return err;
+
+unwind_ops:
+	vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
+free_syncs:
+	if (err == -ENODATA)
+		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
+	while (num_syncs--)
+		xe_sync_entry_cleanup(&syncs[num_syncs]);
+
+	kfree(syncs);
+put_obj:
+	for (i = 0; i < args->num_binds; ++i)
+		xe_bo_put(bos[i]);
+release_vm_lock:
+	up_write(&vm->lock);
+put_vm:
+	xe_vm_put(vm);
+put_exec_queue:
+	if (q)
+		xe_exec_queue_put(q);
+free_objs:
+	kfree(bos);
+	kfree(ops);
+	if (args->num_binds > 1)
+		kfree(bind_ops);
+	return err;
+}
+
+/**
+ * xe_vm_lock() - Lock the vm's dma_resv object
+ * @vm: The struct xe_vm whose lock is to be locked
+ * @intr: Whether to perform any wait interruptible
+ *
+ * Return: 0 on success, -EINTR if @intr is true and the wait for a
+ * contended lock was interrupted. If @intr is false, the function
+ * always returns 0.
+ */
+int xe_vm_lock(struct xe_vm *vm, bool intr)
+{
+	if (intr)
+		return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+
+	return dma_resv_lock(xe_vm_resv(vm), NULL);
+}
+
+/**
+ * xe_vm_unlock() - Unlock the vm's dma_resv object
+ * @vm: The struct xe_vm whose lock is to be released.
+ *
+ * Unlock a buffer object lock that was locked by xe_vm_lock().
+ */
+void xe_vm_unlock(struct xe_vm *vm)
+{
+	dma_resv_unlock(xe_vm_resv(vm));
+}
+
+/**
+ * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
+ * @vma: VMA to invalidate
+ *
+ * Walks a list of page tables leaves which it memset the entries owned by this
+ * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
+ * complete.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_vm_invalidate_vma(struct xe_vma *vma)
+{
+	struct xe_device *xe = xe_vma_vm(vma)->xe;
+	struct xe_tile *tile;
+	u32 tile_needs_invalidate = 0;
+	int seqno[XE_MAX_TILES_PER_DEVICE];
+	u8 id;
+	int ret;
+
+	xe_assert(xe, xe_vm_in_fault_mode(xe_vma_vm(vma)));
+	xe_assert(xe, !xe_vma_is_null(vma));
+	trace_xe_vma_usm_invalidate(vma);
+
+	/* Check that we don't race with page-table updates */
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+		if (xe_vma_is_userptr(vma)) {
+			WARN_ON_ONCE(!mmu_interval_check_retry
+				     (&vma->userptr.notifier,
+				      vma->userptr.notifier_seq));
+			WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
+							     DMA_RESV_USAGE_BOOKKEEP));
+
+		} else {
+			xe_bo_assert_held(xe_vma_bo(vma));
+		}
+	}
+
+	for_each_tile(tile, xe, id) {
+		if (xe_pt_zap_ptes(tile, vma)) {
+			tile_needs_invalidate |= BIT(id);
+			xe_device_wmb(xe);
+			/*
+			 * FIXME: We potentially need to invalidate multiple
+			 * GTs within the tile
+			 */
+			seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
+			if (seqno[id] < 0)
+				return seqno[id];
+		}
+	}
+
+	for_each_tile(tile, xe, id) {
+		if (tile_needs_invalidate & BIT(id)) {
+			ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	vma->usm.tile_invalidated = vma->tile_mask;
+
+	return 0;
+}
+
+int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
+{
+	struct drm_gpuva *gpuva;
+	bool is_vram;
+	uint64_t addr;
+
+	if (!down_read_trylock(&vm->lock)) {
+		drm_printf(p, " Failed to acquire VM lock to dump capture");
+		return 0;
+	}
+	if (vm->pt_root[gt_id]) {
+		addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE);
+		is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo);
+		drm_printf(p, " VM root: A:0x%llx %s\n", addr,
+			   is_vram ? "VRAM" : "SYS");
+	}
+
+	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
+		struct xe_vma *vma = gpuva_to_vma(gpuva);
+		bool is_userptr = xe_vma_is_userptr(vma);
+		bool is_null = xe_vma_is_null(vma);
+
+		if (is_null) {
+			addr = 0;
+		} else if (is_userptr) {
+			struct xe_res_cursor cur;
+
+			if (vma->userptr.sg) {
+				xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE,
+						&cur);
+				addr = xe_res_dma(&cur);
+			} else {
+				addr = 0;
+			}
+		} else {
+			addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE);
+			is_vram = xe_bo_is_vram(xe_vma_bo(vma));
+		}
+		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
+			   xe_vma_start(vma), xe_vma_end(vma) - 1,
+			   xe_vma_size(vma),
+			   addr, is_null ? "NULL" : is_userptr ? "USR" :
+			   is_vram ? "VRAM" : "SYS");
+	}
+	up_read(&vm->lock);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
new file mode 100644
index 000000000000..cf2f96e8c1ab
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -0,0 +1,263 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_VM_H_
+#define _XE_VM_H_
+
+#include "xe_bo_types.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_vm_types.h"
+
+struct drm_device;
+struct drm_printer;
+struct drm_file;
+
+struct ttm_buffer_object;
+struct ttm_validate_buffer;
+
+struct xe_exec_queue;
+struct xe_file;
+struct xe_sync_entry;
+struct drm_exec;
+
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags);
+
+struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id);
+int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node);
+
+static inline struct xe_vm *xe_vm_get(struct xe_vm *vm)
+{
+	drm_gpuvm_get(&vm->gpuvm);
+	return vm;
+}
+
+static inline void xe_vm_put(struct xe_vm *vm)
+{
+	drm_gpuvm_put(&vm->gpuvm);
+}
+
+int xe_vm_lock(struct xe_vm *vm, bool intr);
+
+void xe_vm_unlock(struct xe_vm *vm);
+
+static inline bool xe_vm_is_closed(struct xe_vm *vm)
+{
+	/* Only guaranteed not to change when vm->lock is held */
+	return !vm->size;
+}
+
+static inline bool xe_vm_is_banned(struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_BANNED;
+}
+
+static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm)
+{
+	lockdep_assert_held(&vm->lock);
+	return xe_vm_is_closed(vm) || xe_vm_is_banned(vm);
+}
+
+struct xe_vma *
+xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range);
+
+/**
+ * xe_vm_has_scratch() - Whether the vm is configured for scratch PTEs
+ * @vm: The vm
+ *
+ * Return: whether the vm populates unmapped areas with scratch PTEs
+ */
+static inline bool xe_vm_has_scratch(const struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_SCRATCH_PAGE;
+}
+
+/**
+ * gpuvm_to_vm() - Return the embedding xe_vm from a struct drm_gpuvm pointer
+ * @gpuvm: The struct drm_gpuvm pointer
+ *
+ * Return: Pointer to the embedding struct xe_vm.
+ */
+static inline struct xe_vm *gpuvm_to_vm(struct drm_gpuvm *gpuvm)
+{
+	return container_of(gpuvm, struct xe_vm, gpuvm);
+}
+
+static inline struct xe_vm *gpuva_to_vm(struct drm_gpuva *gpuva)
+{
+	return gpuvm_to_vm(gpuva->vm);
+}
+
+static inline struct xe_vma *gpuva_to_vma(struct drm_gpuva *gpuva)
+{
+	return container_of(gpuva, struct xe_vma, gpuva);
+}
+
+static inline struct xe_vma_op *gpuva_op_to_vma_op(struct drm_gpuva_op *op)
+{
+	return container_of(op, struct xe_vma_op, base);
+}
+
+/**
+ * DOC: Provide accessors for vma members to facilitate easy change of
+ * implementation.
+ */
+static inline u64 xe_vma_start(struct xe_vma *vma)
+{
+	return vma->gpuva.va.addr;
+}
+
+static inline u64 xe_vma_size(struct xe_vma *vma)
+{
+	return vma->gpuva.va.range;
+}
+
+static inline u64 xe_vma_end(struct xe_vma *vma)
+{
+	return xe_vma_start(vma) + xe_vma_size(vma);
+}
+
+static inline u64 xe_vma_bo_offset(struct xe_vma *vma)
+{
+	return vma->gpuva.gem.offset;
+}
+
+static inline struct xe_bo *xe_vma_bo(struct xe_vma *vma)
+{
+	return !vma->gpuva.gem.obj ? NULL :
+		container_of(vma->gpuva.gem.obj, struct xe_bo, ttm.base);
+}
+
+static inline struct xe_vm *xe_vma_vm(struct xe_vma *vma)
+{
+	return container_of(vma->gpuva.vm, struct xe_vm, gpuvm);
+}
+
+static inline bool xe_vma_read_only(struct xe_vma *vma)
+{
+	return vma->gpuva.flags & XE_VMA_READ_ONLY;
+}
+
+static inline u64 xe_vma_userptr(struct xe_vma *vma)
+{
+	return vma->gpuva.gem.offset;
+}
+
+static inline bool xe_vma_is_null(struct xe_vma *vma)
+{
+	return vma->gpuva.flags & DRM_GPUVA_SPARSE;
+}
+
+static inline bool xe_vma_has_no_bo(struct xe_vma *vma)
+{
+	return !xe_vma_bo(vma);
+}
+
+static inline bool xe_vma_is_userptr(struct xe_vma *vma)
+{
+	return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma);
+}
+
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile);
+
+int xe_vm_create_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file);
+int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int xe_vm_bind_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *file);
+
+void xe_vm_close_and_put(struct xe_vm *vm);
+
+static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_FAULT_MODE;
+}
+
+static inline bool xe_vm_in_lr_mode(struct xe_vm *vm)
+{
+	return vm->flags & XE_VM_FLAG_LR_MODE;
+}
+
+static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm)
+{
+	return xe_vm_in_lr_mode(vm) && !xe_vm_in_fault_mode(vm);
+}
+
+int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
+void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
+
+int xe_vm_userptr_pin(struct xe_vm *vm);
+
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
+
+int xe_vm_userptr_check_repin(struct xe_vm *vm);
+
+struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
+
+int xe_vm_invalidate_vma(struct xe_vma *vma);
+
+extern struct ttm_device_funcs xe_ttm_funcs;
+
+static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
+{
+	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
+	queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
+}
+
+/**
+ * xe_vm_reactivate_rebind() - Reactivate the rebind functionality on compute
+ * vms.
+ * @vm: The vm.
+ *
+ * If the rebind functionality on a compute vm was disabled due
+ * to nothing to execute. Reactivate it and run the rebind worker.
+ * This function should be called after submitting a batch to a compute vm.
+ */
+static inline void xe_vm_reactivate_rebind(struct xe_vm *vm)
+{
+	if (xe_vm_in_preempt_fence_mode(vm) && vm->preempt.rebind_deactivated) {
+		vm->preempt.rebind_deactivated = false;
+		xe_vm_queue_rebind_worker(vm);
+	}
+}
+
+int xe_vma_userptr_pin_pages(struct xe_vma *vma);
+
+int xe_vma_userptr_check_repin(struct xe_vma *vma);
+
+bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
+
+int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
+
+int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
+		      unsigned int num_shared);
+
+/**
+ * xe_vm_resv() - Return's the vm's reservation object
+ * @vm: The vm
+ *
+ * Return: Pointer to the vm's reservation object.
+ */
+static inline struct dma_resv *xe_vm_resv(struct xe_vm *vm)
+{
+	return drm_gpuvm_resv(&vm->gpuvm);
+}
+
+/**
+ * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held.
+ * @vm: The vm
+ */
+#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+#define vm_dbg drm_dbg
+#else
+__printf(2, 3)
+static inline void vm_dbg(const struct drm_device *dev,
+			  const char *format, ...)
+{ /* noop */ }
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
new file mode 100644
index 000000000000..bdc6659891a5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -0,0 +1,555 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_VM_DOC_H_
+#define _XE_VM_DOC_H_
+
+/**
+ * DOC: XE VM (user address space)
+ *
+ * VM creation
+ * ===========
+ *
+ * Allocate a physical page for root of the page table structure, create default
+ * bind engine, and return a handle to the user.
+ *
+ * Scratch page
+ * ------------
+ *
+ * If the VM is created with the flag, DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE, set the
+ * entire page table structure defaults pointing to blank page allocated by the
+ * VM. Invalid memory access rather than fault just read / write to this page.
+ *
+ * VM bind (create GPU mapping for a BO or userptr)
+ * ================================================
+ *
+ * Creates GPU mapings for a BO or userptr within a VM. VM binds uses the same
+ * in / out fence interface (struct drm_xe_sync) as execs which allows users to
+ * think of binds and execs as more or less the same operation.
+ *
+ * Operations
+ * ----------
+ *
+ * DRM_XE_VM_BIND_OP_MAP		- Create mapping for a BO
+ * DRM_XE_VM_BIND_OP_UNMAP		- Destroy mapping for a BO / userptr
+ * DRM_XE_VM_BIND_OP_MAP_USERPTR	- Create mapping for userptr
+ *
+ * Implementation details
+ * ~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * All bind operations are implemented via a hybrid approach of using the CPU
+ * and GPU to modify page tables. If a new physical page is allocated in the
+ * page table structure we populate that page via the CPU and insert that new
+ * page into the existing page table structure via a GPU job. Also any existing
+ * pages in the page table structure that need to be modified also are updated
+ * via the GPU job. As the root physical page is prealloced on VM creation our
+ * GPU job will always have at least 1 update. The in / out fences are passed to
+ * this job so again this is conceptually the same as an exec.
+ *
+ * Very simple example of few binds on an empty VM with 48 bits of address space
+ * and the resulting operations:
+ *
+ * .. code-block::
+ *
+ *	bind BO0 0x0-0x1000
+ *	alloc page level 3a, program PTE[0] to BO0 phys address (CPU)
+ *	alloc page level 2, program PDE[0] page level 3a phys address (CPU)
+ *	alloc page level 1, program PDE[0] page level 2 phys address (CPU)
+ *	update root PDE[0] to page level 1 phys address (GPU)
+ *
+ *	bind BO1 0x201000-0x202000
+ *	alloc page level 3b, program PTE[1] to BO1 phys address (CPU)
+ *	update page level 2 PDE[1] to page level 3b phys address (GPU)
+ *
+ *	bind BO2 0x1ff000-0x201000
+ *	update page level 3a PTE[511] to BO2 phys addres (GPU)
+ *	update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU)
+ *
+ * GPU bypass
+ * ~~~~~~~~~~
+ *
+ * In the above example the steps using the GPU can be converted to CPU if the
+ * bind can be done immediately (all in-fences satisfied, VM dma-resv kernel
+ * slot is idle).
+ *
+ * Address space
+ * -------------
+ *
+ * Depending on platform either 48 or 57 bits of address space is supported.
+ *
+ * Page sizes
+ * ----------
+ *
+ * The minimum page size is either 4k or 64k depending on platform and memory
+ * placement (sysmem vs. VRAM). We enforce that binds must be aligned to the
+ * minimum page size.
+ *
+ * Larger pages (2M or 1GB) can be used for BOs in VRAM, the BO physical address
+ * is aligned to the larger pages size, and VA is aligned to the larger page
+ * size. Larger pages for userptrs / BOs in sysmem should be possible but is not
+ * yet implemented.
+ *
+ * Sync error handling mode
+ * ------------------------
+ *
+ * In both modes during the bind IOCTL the user input is validated. In sync
+ * error handling mode the newly bound BO is validated (potentially moved back
+ * to a region of memory where is can be used), page tables are updated by the
+ * CPU and the job to do the GPU binds is created in the IOCTL itself. This step
+ * can fail due to memory pressure. The user can recover by freeing memory and
+ * trying this operation again.
+ *
+ * Async error handling mode
+ * -------------------------
+ *
+ * In async error handling the step of validating the BO, updating page tables,
+ * and generating a job are deferred to an async worker. As this step can now
+ * fail after the IOCTL has reported success we need an error handling flow for
+ * which the user can recover from.
+ *
+ * The solution is for a user to register a user address with the VM which the
+ * VM uses to report errors to. The ufence wait interface can be used to wait on
+ * a VM going into an error state. Once an error is reported the VM's async
+ * worker is paused. While the VM's async worker is paused sync,
+ * DRM_XE_VM_BIND_OP_UNMAP operations are allowed (this can free memory). Once the
+ * uses believe the error state is fixed, the async worker can be resumed via
+ * XE_VM_BIND_OP_RESTART operation. When VM async bind work is restarted, the
+ * first operation processed is the operation that caused the original error.
+ *
+ * Bind queues / engines
+ * ---------------------
+ *
+ * Think of the case where we have two bind operations A + B and are submitted
+ * in that order. A has in fences while B has none. If using a single bind
+ * queue, B is now blocked on A's in fences even though it is ready to run. This
+ * example is a real use case for VK sparse binding. We work around this
+ * limitation by implementing bind engines.
+ *
+ * In the bind IOCTL the user can optionally pass in an engine ID which must map
+ * to an engine which is of the special class DRM_XE_ENGINE_CLASS_VM_BIND.
+ * Underneath this is a really virtual engine that can run on any of the copy
+ * hardware engines. The job(s) created each IOCTL are inserted into this
+ * engine's ring. In the example above if A and B have different bind engines B
+ * is free to pass A. If the engine ID field is omitted, the default bind queue
+ * for the VM is used.
+ *
+ * TODO: Explain race in issue 41 and how we solve it
+ *
+ * Array of bind operations
+ * ------------------------
+ *
+ * The uAPI allows multiple binds operations to be passed in via a user array,
+ * of struct drm_xe_vm_bind_op, in a single VM bind IOCTL. This interface
+ * matches the VK sparse binding API. The implementation is rather simple, parse
+ * the array into a list of operations, pass the in fences to the first operation,
+ * and pass the out fences to the last operation. The ordered nature of a bind
+ * engine makes this possible.
+ *
+ * Munmap semantics for unbinds
+ * ----------------------------
+ *
+ * Munmap allows things like:
+ *
+ * .. code-block::
+ *
+ *	0x0000-0x2000 and 0x3000-0x5000 have mappings
+ *	Munmap 0x1000-0x4000, results in mappings 0x0000-0x1000 and 0x4000-0x5000
+ *
+ * To support this semantic in the above example we decompose the above example
+ * into 4 operations:
+ *
+ * .. code-block::
+ *
+ *	unbind 0x0000-0x2000
+ *	unbind 0x3000-0x5000
+ *	rebind 0x0000-0x1000
+ *	rebind 0x4000-0x5000
+ *
+ * Why not just do a partial unbind of 0x1000-0x2000 and 0x3000-0x4000? This
+ * falls apart when using large pages at the edges and the unbind forces us to
+ * use a smaller page size. For simplity we always issue a set of unbinds
+ * unmapping anything in the range and at most 2 rebinds on the edges.
+ *
+ * Similar to an array of binds, in fences are passed to the first operation and
+ * out fences are signaled on the last operation.
+ *
+ * In this example there is a window of time where 0x0000-0x1000 and
+ * 0x4000-0x5000 are invalid but the user didn't ask for these addresses to be
+ * removed from the mapping. To work around this we treat any munmap style
+ * unbinds which require a rebind as a kernel operations (BO eviction or userptr
+ * invalidation). The first operation waits on the VM's
+ * DMA_RESV_USAGE_PREEMPT_FENCE slots (waits for all pending jobs on VM to
+ * complete / triggers preempt fences) and the last operation is installed in
+ * the VM's DMA_RESV_USAGE_KERNEL slot (blocks future jobs / resume compute mode
+ * VM). The caveat is all dma-resv slots must be updated atomically with respect
+ * to execs and compute mode rebind worker. To accomplish this, hold the
+ * vm->lock in write mode from the first operation until the last.
+ *
+ * Deferred binds in fault mode
+ * ----------------------------
+ *
+ * In a VM is in fault mode (TODO: link to fault mode), new bind operations that
+ * create mappings are by default are deferred to the page fault handler (first
+ * use). This behavior can be overriden by setting the flag
+ * DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping
+ * immediately.
+ *
+ * User pointer
+ * ============
+ *
+ * User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the
+ * user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO
+ * was created and then a binding was created. We bypass creating a dummy BO in
+ * XE and simply create a binding directly from the userptr.
+ *
+ * Invalidation
+ * ------------
+ *
+ * Since this a core kernel managed memory the kernel can move this memory
+ * whenever it wants. We register an invalidation MMU notifier to alert XE when
+ * a user poiter is about to move. The invalidation notifier needs to block
+ * until all pending users (jobs or compute mode engines) of the userptr are
+ * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots.
+ *
+ * Rebinds
+ * -------
+ *
+ * Either the next exec (non-compute) or rebind worker (compute mode) will
+ * rebind the userptr. The invalidation MMU notifier kicks the rebind worker
+ * after the VM dma-resv wait if the VM is in compute mode.
+ *
+ * Compute mode
+ * ============
+ *
+ * A VM in compute mode enables long running workloads and ultra low latency
+ * submission (ULLS). ULLS is implemented via a continuously running batch +
+ * semaphores. This enables to the user to insert jump to new batch commands
+ * into the continuously running batch. In both cases these batches exceed the
+ * time a dma fence is allowed to exist for before signaling, as such dma fences
+ * are not used when a VM is in compute mode. User fences (TODO: link user fence
+ * doc) are used instead to signal operation's completion.
+ *
+ * Preempt fences
+ * --------------
+ *
+ * If the kernel decides to move memory around (either userptr invalidate, BO
+ * eviction, or mumap style unbind which results in a rebind) and a batch is
+ * running on an engine, that batch can fault or cause a memory corruption as
+ * page tables for the moved memory are no longer valid. To work around this we
+ * introduce the concept of preempt fences. When sw signaling is enabled on a
+ * preempt fence it tells the submission backend to kick that engine off the
+ * hardware and the preempt fence signals when the engine is off the hardware.
+ * Once all preempt fences are signaled for a VM the kernel can safely move the
+ * memory and kick the rebind worker which resumes all the engines execution.
+ *
+ * A preempt fence, for every engine using the VM, is installed the VM's
+ * dma-resv DMA_RESV_USAGE_PREEMPT_FENCE slot. The same preempt fence, for every
+ * engine using the VM, is also installed into the same dma-resv slot of every
+ * external BO mapped in the VM.
+ *
+ * Rebind worker
+ * -------------
+ *
+ * The rebind worker is very similar to an exec. It is resposible for rebinding
+ * evicted BOs or userptrs, waiting on those operations, installing new preempt
+ * fences, and finally resuming executing of engines in the VM.
+ *
+ * Flow
+ * ~~~~
+ *
+ * .. code-block::
+ *
+ *	<----------------------------------------------------------------------|
+ *	Check if VM is closed, if so bail out                                  |
+ *	Lock VM global lock in read mode                                       |
+ *	Pin userptrs (also finds userptr invalidated since last rebind worker) |
+ *	Lock VM dma-resv and external BOs dma-resv                             |
+ *	Validate BOs that have been evicted                                    |
+ *	Wait on and allocate new preempt fences for every engine using the VM  |
+ *	Rebind invalidated userptrs + evicted BOs                              |
+ *	Wait on last rebind fence                                              |
+ *	Wait VM's DMA_RESV_USAGE_KERNEL dma-resv slot                          |
+ *	Install preeempt fences and issue resume for every engine using the VM |
+ *	Check if any userptrs invalidated since pin                            |
+ *		Squash resume for all engines                                  |
+ *		Unlock all                                                     |
+ *		Wait all VM's dma-resv slots                                   |
+ *		Retry ----------------------------------------------------------
+ *	Release all engines waiting to resume
+ *	Unlock all
+ *
+ * Timeslicing
+ * -----------
+ *
+ * In order to prevent an engine from continuously being kicked off the hardware
+ * and making no forward progress an engine has a period of time it allowed to
+ * run after resume before it can be kicked off again. This effectively gives
+ * each engine a timeslice.
+ *
+ * Handling multiple GTs
+ * =====================
+ *
+ * If a GT has slower access to some regions and the page table structure are in
+ * the slow region, the performance on that GT could adversely be affected. To
+ * work around this we allow a VM page tables to be shadowed in multiple GTs.
+ * When VM is created, a default bind engine and PT table structure are created
+ * on each GT.
+ *
+ * Binds can optionally pass in a mask of GTs where a mapping should be created,
+ * if this mask is zero then default to all the GTs where the VM has page
+ * tables.
+ *
+ * The implementation for this breaks down into a bunch for_each_gt loops in
+ * various places plus exporting a composite fence for multi-GT binds to the
+ * user.
+ *
+ * Fault mode (unified shared memory)
+ * ==================================
+ *
+ * A VM in fault mode can be enabled on devices that support page faults. If
+ * page faults are enabled, using dma fences can potentially induce a deadlock:
+ * A pending page fault can hold up the GPU work which holds up the dma fence
+ * signaling, and memory allocation is usually required to resolve a page
+ * fault, but memory allocation is not allowed to gate dma fence signaling. As
+ * such, dma fences are not allowed when VM is in fault mode. Because dma-fences
+ * are not allowed, long running workloads and ULLS are enabled on a faulting
+ * VM.
+ *
+ * Defered VM binds
+ * ----------------
+ *
+ * By default, on a faulting VM binds just allocate the VMA and the actual
+ * updating of the page tables is defered to the page fault handler. This
+ * behavior can be overridden by setting the flag DRM_XE_VM_BIND_FLAG_IMMEDIATE in
+ * the VM bind which will then do the bind immediately.
+ *
+ * Page fault handler
+ * ------------------
+ *
+ * Page faults are received in the G2H worker under the CT lock which is in the
+ * path of dma fences (no memory allocations are allowed, faults require memory
+ * allocations) thus we cannot process faults under the CT lock. Another issue
+ * is faults issue TLB invalidations which require G2H credits and we cannot
+ * allocate G2H credits in the G2H handlers without deadlocking. Lastly, we do
+ * not want the CT lock to be an outer lock of the VM global lock (VM global
+ * lock required to fault processing).
+ *
+ * To work around the above issue with processing faults in the G2H worker, we
+ * sink faults to a buffer which is large enough to sink all possible faults on
+ * the GT (1 per hardware engine) and kick a worker to process the faults. Since
+ * the page faults G2H are already received in a worker, kicking another worker
+ * adds more latency to a critical performance path. We add a fast path in the
+ * G2H irq handler which looks at first G2H and if it is a page fault we sink
+ * the fault to the buffer and kick the worker to process the fault. TLB
+ * invalidation responses are also in the critical path so these can also be
+ * processed in this fast path.
+ *
+ * Multiple buffers and workers are used and hashed over based on the ASID so
+ * faults from different VMs can be processed in parallel.
+ *
+ * The page fault handler itself is rather simple, flow is below.
+ *
+ * .. code-block::
+ *
+ *	Lookup VM from ASID in page fault G2H
+ *	Lock VM global lock in read mode
+ *	Lookup VMA from address in page fault G2H
+ *	Check if VMA is valid, if not bail
+ *	Check if VMA's BO has backing store, if not allocate
+ *	<----------------------------------------------------------------------|
+ *	If userptr, pin pages                                                  |
+ *	Lock VM & BO dma-resv locks                                            |
+ *	If atomic fault, migrate to VRAM, else validate BO location            |
+ *	Issue rebind                                                           |
+ *	Wait on rebind to complete                                             |
+ *	Check if userptr invalidated since pin                                 |
+ *		Drop VM & BO dma-resv locks                                    |
+ *		Retry ----------------------------------------------------------
+ *	Unlock all
+ *	Issue blocking TLB invalidation                                        |
+ *	Send page fault response to GuC
+ *
+ * Access counters
+ * ---------------
+ *
+ * Access counters can be configured to trigger a G2H indicating the device is
+ * accessing VMAs in system memory frequently as hint to migrate those VMAs to
+ * VRAM.
+ *
+ * Same as the page fault handler, access counters G2H cannot be processed the
+ * G2H worker under the CT lock. Again we use a buffer to sink access counter
+ * G2H. Unlike page faults there is no upper bound so if the buffer is full we
+ * simply drop the G2H. Access counters are a best case optimization and it is
+ * safe to drop these unlike page faults.
+ *
+ * The access counter handler itself is rather simple flow is below.
+ *
+ * .. code-block::
+ *
+ *	Lookup VM from ASID in access counter G2H
+ *	Lock VM global lock in read mode
+ *	Lookup VMA from address in access counter G2H
+ *	If userptr, bail nothing to do
+ *	Lock VM & BO dma-resv locks
+ *	Issue migration to VRAM
+ *	Unlock all
+ *
+ * Notice no rebind is issued in the access counter handler as the rebind will
+ * be issued on next page fault.
+ *
+ * Cavets with eviction / user pointer invalidation
+ * ------------------------------------------------
+ *
+ * In the case of eviction and user pointer invalidation on a faulting VM, there
+ * is no need to issue a rebind rather we just need to blow away the page tables
+ * for the VMAs and the page fault handler will rebind the VMAs when they fault.
+ * The cavet is to update / read the page table structure the VM global lock is
+ * neeeed. In both the case of eviction and user pointer invalidation locks are
+ * held which make acquiring the VM global lock impossible. To work around this
+ * every VMA maintains a list of leaf page table entries which should be written
+ * to zero to blow away the VMA's page tables. After writing zero to these
+ * entries a blocking TLB invalidate is issued. At this point it is safe for the
+ * kernel to move the VMA's memory around. This is a necessary lockless
+ * algorithm and is safe as leafs cannot be changed while either an eviction or
+ * userptr invalidation is occurring.
+ *
+ * Locking
+ * =======
+ *
+ * VM locking protects all of the core data paths (bind operations, execs,
+ * evictions, and compute mode rebind worker) in XE.
+ *
+ * Locks
+ * -----
+ *
+ * VM global lock (vm->lock) - rw semaphore lock. Outer most lock which protects
+ * the list of userptrs mapped in the VM, the list of engines using this VM, and
+ * the array of external BOs mapped in the VM. When adding or removing any of the
+ * aforemented state from the VM should acquire this lock in write mode. The VM
+ * bind path also acquires this lock in write while the exec / compute mode
+ * rebind worker acquire this lock in read mode.
+ *
+ * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv
+ * slots which is shared with any private BO in the VM. Expected to be acquired
+ * during VM binds, execs, and compute mode rebind worker. This lock is also
+ * held when private BOs are being evicted.
+ *
+ * external BO dma-resv lock (bo->ttm.base.resv->lock) - WW lock. Protects
+ * external BO dma-resv slots. Expected to be acquired during VM binds (in
+ * addition to the VM dma-resv lock). All external BO dma-locks within a VM are
+ * expected to be acquired (in addition to the VM dma-resv lock) during execs
+ * and the compute mode rebind worker. This lock is also held when an external
+ * BO is being evicted.
+ *
+ * Putting it all together
+ * -----------------------
+ *
+ * 1. An exec and bind operation with the same VM can't be executing at the same
+ * time (vm->lock).
+ *
+ * 2. A compute mode rebind worker and bind operation with the same VM can't be
+ * executing at the same time (vm->lock).
+ *
+ * 3. We can't add / remove userptrs or external BOs to a VM while an exec with
+ * the same VM is executing (vm->lock).
+ *
+ * 4. We can't add / remove userptrs, external BOs, or engines to a VM while a
+ * compute mode rebind worker with the same VM is executing (vm->lock).
+ *
+ * 5. Evictions within a VM can't be happen while an exec with the same VM is
+ * executing (dma-resv locks).
+ *
+ * 6. Evictions within a VM can't be happen while a compute mode rebind worker
+ * with the same VM is executing (dma-resv locks).
+ *
+ * dma-resv usage
+ * ==============
+ *
+ * As previously stated to enforce the ordering of kernel ops (eviction, userptr
+ * invalidation, munmap style unbinds which result in a rebind), rebinds during
+ * execs, execs, and resumes in the rebind worker we use both the VMs and
+ * external BOs dma-resv slots. Let try to make this as clear as possible.
+ *
+ * Slot installation
+ * -----------------
+ *
+ * 1. Jobs from kernel ops install themselves into the DMA_RESV_USAGE_KERNEL
+ * slot of either an external BO or VM (depends on if kernel op is operating on
+ * an external or private BO)
+ *
+ * 2. In non-compute mode, jobs from execs install themselves into the
+ * DMA_RESV_USAGE_BOOKKEEP slot of the VM
+ *
+ * 3. In non-compute mode, jobs from execs install themselves into the
+ * DMA_RESV_USAGE_WRITE slot of all external BOs in the VM
+ *
+ * 4. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot
+ * of the VM
+ *
+ * 5. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot
+ * of the external BO (if the bind is to an external BO, this is addition to #4)
+ *
+ * 6. Every engine using a compute mode VM has a preempt fence in installed into
+ * the DMA_RESV_USAGE_PREEMPT_FENCE slot of the VM
+ *
+ * 7. Every engine using a compute mode VM has a preempt fence in installed into
+ * the DMA_RESV_USAGE_PREEMPT_FENCE slot of all the external BOs in the VM
+ *
+ * Slot waiting
+ * ------------
+ *
+ * 1. The exection of all jobs from kernel ops shall wait on all slots
+ * (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if
+ * kernel op is operating on external or private BO)
+ *
+ * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall
+ * wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM
+ * (depends on if the rebind is operatiing on an external or private BO)
+ *
+ * 3. In non-compute mode, the exection of all jobs from execs shall wait on the
+ * last rebind job
+ *
+ * 4. In compute mode, the exection of all jobs from rebinds in the rebind
+ * worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO
+ * or VM (depends on if rebind is operating on external or private BO)
+ *
+ * 5. In compute mode, resumes in rebind worker shall wait on last rebind fence
+ *
+ * 6. In compute mode, resumes in rebind worker shall wait on the
+ * DMA_RESV_USAGE_KERNEL slot of the VM
+ *
+ * Putting it all together
+ * -----------------------
+ *
+ * 1. New jobs from kernel ops are blocked behind any existing jobs from
+ * non-compute mode execs
+ *
+ * 2. New jobs from non-compute mode execs are blocked behind any existing jobs
+ * from kernel ops and rebinds
+ *
+ * 3. New jobs from kernel ops are blocked behind all preempt fences signaling in
+ * compute mode
+ *
+ * 4. Compute mode engine resumes are blocked behind any existing jobs from
+ * kernel ops and rebinds
+ *
+ * Future work
+ * ===========
+ *
+ * Support large pages for sysmem and userptr.
+ *
+ * Update page faults to handle BOs are page level grainularity (e.g. part of BO
+ * could be in system memory while another part could be in VRAM).
+ *
+ * Page fault handler likely we be optimized a bit more (e.g. Rebinds always
+ * wait on the dma-resv kernel slots of VM or BO, technically we only have to
+ * wait the BO moving. If using a job to do the rebind, we could not block in
+ * the page fault handler rather attach a callback to fence of the rebind job to
+ * signal page fault complete. Our handling of short circuting for atomic faults
+ * for bound VMAs could be better. etc...). We can tune all of this once we have
+ * benchmarks / performance number from workloads up and running.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
new file mode 100644
index 000000000000..63e8a50b88e9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -0,0 +1,373 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_VM_TYPES_H_
+#define _XE_VM_TYPES_H_
+
+#include <drm/drm_gpuvm.h>
+
+#include <linux/dma-resv.h>
+#include <linux/kref.h>
+#include <linux/mmu_notifier.h>
+#include <linux/scatterlist.h>
+
+#include "xe_device_types.h"
+#include "xe_pt_types.h"
+#include "xe_range_fence.h"
+
+struct xe_bo;
+struct xe_sync_entry;
+struct xe_vm;
+
+#define TEST_VM_ASYNC_OPS_ERROR
+#define FORCE_ASYNC_OP_ERROR	BIT(31)
+
+#define XE_VMA_READ_ONLY	DRM_GPUVA_USERBITS
+#define XE_VMA_DESTROYED	(DRM_GPUVA_USERBITS << 1)
+#define XE_VMA_ATOMIC_PTE_BIT	(DRM_GPUVA_USERBITS << 2)
+#define XE_VMA_FIRST_REBIND	(DRM_GPUVA_USERBITS << 3)
+#define XE_VMA_LAST_REBIND	(DRM_GPUVA_USERBITS << 4)
+#define XE_VMA_PTE_4K		(DRM_GPUVA_USERBITS << 5)
+#define XE_VMA_PTE_2M		(DRM_GPUVA_USERBITS << 6)
+#define XE_VMA_PTE_1G		(DRM_GPUVA_USERBITS << 7)
+
+/** struct xe_userptr - User pointer */
+struct xe_userptr {
+	/** @invalidate_link: Link for the vm::userptr.invalidated list */
+	struct list_head invalidate_link;
+	/**
+	 * @notifier: MMU notifier for user pointer (invalidation call back)
+	 */
+	struct mmu_interval_notifier notifier;
+	/** @sgt: storage for a scatter gather table */
+	struct sg_table sgt;
+	/** @sg: allocated scatter gather table */
+	struct sg_table *sg;
+	/** @notifier_seq: notifier sequence number */
+	unsigned long notifier_seq;
+	/**
+	 * @initial_bind: user pointer has been bound at least once.
+	 * write: vm->userptr.notifier_lock in read mode and vm->resv held.
+	 * read: vm->userptr.notifier_lock in write mode or vm->resv held.
+	 */
+	bool initial_bind;
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+	u32 divisor;
+#endif
+};
+
+struct xe_vma {
+	/** @gpuva: Base GPUVA object */
+	struct drm_gpuva gpuva;
+
+	/**
+	 * @combined_links: links into lists which are mutually exclusive.
+	 * Locking: vm lock in write mode OR vm lock in read mode and the vm's
+	 * resv.
+	 */
+	union {
+		/** @userptr: link into VM repin list if userptr. */
+		struct list_head userptr;
+		/** @rebind: link into VM if this VMA needs rebinding. */
+		struct list_head rebind;
+		/** @destroy: link to contested list when VM is being closed. */
+		struct list_head destroy;
+	} combined_links;
+
+	union {
+		/** @destroy_cb: callback to destroy VMA when unbind job is done */
+		struct dma_fence_cb destroy_cb;
+		/** @destroy_work: worker to destroy this BO */
+		struct work_struct destroy_work;
+	};
+
+	/** @usm: unified shared memory state */
+	struct {
+		/** @tile_invalidated: VMA has been invalidated */
+		u8 tile_invalidated;
+	} usm;
+
+	/** @tile_mask: Tile mask of where to create binding for this VMA */
+	u8 tile_mask;
+
+	/**
+	 * @tile_present: GT mask of binding are present for this VMA.
+	 * protected by vm->lock, vm->resv and for userptrs,
+	 * vm->userptr.notifier_lock for writing. Needs either for reading,
+	 * but if reading is done under the vm->lock only, it needs to be held
+	 * in write mode.
+	 */
+	u8 tile_present;
+
+	/**
+	 * @pat_index: The pat index to use when encoding the PTEs for this vma.
+	 */
+	u16 pat_index;
+
+	/**
+	 * @userptr: user pointer state, only allocated for VMAs that are
+	 * user pointers
+	 */
+	struct xe_userptr userptr;
+};
+
+struct xe_device;
+
+struct xe_vm {
+	/** @gpuvm: base GPUVM used to track VMAs */
+	struct drm_gpuvm gpuvm;
+
+	struct xe_device *xe;
+
+	/* exec queue used for (un)binding vma's */
+	struct xe_exec_queue *q[XE_MAX_TILES_PER_DEVICE];
+
+	/** @lru_bulk_move: Bulk LRU move list for this VM's BOs */
+	struct ttm_lru_bulk_move lru_bulk_move;
+
+	u64 size;
+
+	struct xe_pt *pt_root[XE_MAX_TILES_PER_DEVICE];
+	struct xe_pt *scratch_pt[XE_MAX_TILES_PER_DEVICE][XE_VM_MAX_LEVEL];
+
+	/**
+	 * @flags: flags for this VM, statically setup a creation time aside
+	 * from XE_VM_FLAG_BANNED which requires vm->lock to set / read safely
+	 */
+#define XE_VM_FLAG_64K			BIT(0)
+#define XE_VM_FLAG_LR_MODE		BIT(1)
+#define XE_VM_FLAG_MIGRATION		BIT(2)
+#define XE_VM_FLAG_SCRATCH_PAGE		BIT(3)
+#define XE_VM_FLAG_FAULT_MODE		BIT(4)
+#define XE_VM_FLAG_BANNED		BIT(5)
+#define XE_VM_FLAG_TILE_ID(flags)	FIELD_GET(GENMASK(7, 6), flags)
+#define XE_VM_FLAG_SET_TILE_ID(tile)	FIELD_PREP(GENMASK(7, 6), (tile)->id)
+	unsigned long flags;
+
+	/** @composite_fence_ctx: context composite fence */
+	u64 composite_fence_ctx;
+	/** @composite_fence_seqno: seqno for composite fence */
+	u32 composite_fence_seqno;
+
+	/**
+	 * @lock: outer most lock, protects objects of anything attached to this
+	 * VM
+	 */
+	struct rw_semaphore lock;
+
+	/**
+	 * @rebind_list: list of VMAs that need rebinding. Protected by the
+	 * vm->lock in write mode, OR (the vm->lock in read mode and the
+	 * vm resv).
+	 */
+	struct list_head rebind_list;
+
+	/** @rebind_fence: rebind fence from execbuf */
+	struct dma_fence *rebind_fence;
+
+	/**
+	 * @destroy_work: worker to destroy VM, needed as a dma_fence signaling
+	 * from an irq context can be last put and the destroy needs to be able
+	 * to sleep.
+	 */
+	struct work_struct destroy_work;
+
+	/**
+	 * @rftree: range fence tree to track updates to page table structure.
+	 * Used to implement conflict tracking between independent bind engines.
+	 */
+	struct xe_range_fence_tree rftree[XE_MAX_TILES_PER_DEVICE];
+
+	/** @async_ops: async VM operations (bind / unbinds) */
+	struct {
+		/** @list: list of pending async VM ops */
+		struct list_head pending;
+		/** @work: worker to execute async VM ops */
+		struct work_struct work;
+		/** @lock: protects list of pending async VM ops and fences */
+		spinlock_t lock;
+		/** @fence: fence state */
+		struct {
+			/** @context: context of async fence */
+			u64 context;
+			/** @seqno: seqno of async fence */
+			u32 seqno;
+		} fence;
+		/** @error: error state for async VM ops */
+		int error;
+		/**
+		 * @munmap_rebind_inflight: an munmap style VM bind is in the
+		 * middle of a set of ops which requires a rebind at the end.
+		 */
+		bool munmap_rebind_inflight;
+	} async_ops;
+
+	const struct xe_pt_ops *pt_ops;
+
+	/** @userptr: user pointer state */
+	struct {
+		/**
+		 * @userptr.repin_list: list of VMAs which are user pointers,
+		 * and needs repinning. Protected by @lock.
+		 */
+		struct list_head repin_list;
+		/**
+		 * @notifier_lock: protects notifier in write mode and
+		 * submission in read mode.
+		 */
+		struct rw_semaphore notifier_lock;
+		/**
+		 * @userptr.invalidated_lock: Protects the
+		 * @userptr.invalidated list.
+		 */
+		spinlock_t invalidated_lock;
+		/**
+		 * @userptr.invalidated: List of invalidated userptrs, not yet
+		 * picked
+		 * up for revalidation. Protected from access with the
+		 * @invalidated_lock. Removing items from the list
+		 * additionally requires @lock in write mode, and adding
+		 * items to the list requires the @userptr.notifer_lock in
+		 * write mode.
+		 */
+		struct list_head invalidated;
+	} userptr;
+
+	/** @preempt: preempt state */
+	struct {
+		/**
+		 * @min_run_period_ms: The minimum run period before preempting
+		 * an engine again
+		 */
+		s64 min_run_period_ms;
+		/** @exec_queues: list of exec queues attached to this VM */
+		struct list_head exec_queues;
+		/** @num_exec_queues: number exec queues attached to this VM */
+		int num_exec_queues;
+		/**
+		 * @rebind_deactivated: Whether rebind has been temporarily deactivated
+		 * due to no work available. Protected by the vm resv.
+		 */
+		bool rebind_deactivated;
+		/**
+		 * @rebind_work: worker to rebind invalidated userptrs / evicted
+		 * BOs
+		 */
+		struct work_struct rebind_work;
+	} preempt;
+
+	/** @um: unified memory state */
+	struct {
+		/** @asid: address space ID, unique to each VM */
+		u32 asid;
+		/**
+		 * @last_fault_vma: Last fault VMA, used for fast lookup when we
+		 * get a flood of faults to the same VMA
+		 */
+		struct xe_vma *last_fault_vma;
+	} usm;
+
+	/** @error_capture: allow to track errors */
+	struct {
+		/** @capture_once: capture only one error per VM */
+		bool capture_once;
+	} error_capture;
+
+	/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
+	bool batch_invalidate_tlb;
+	/** @xef: XE file handle for tracking this VM's drm client */
+	struct xe_file *xef;
+};
+
+/** struct xe_vma_op_map - VMA map operation */
+struct xe_vma_op_map {
+	/** @vma: VMA to map */
+	struct xe_vma *vma;
+	/** @immediate: Immediate bind */
+	bool immediate;
+	/** @read_only: Read only */
+	bool read_only;
+	/** @is_null: is NULL binding */
+	bool is_null;
+	/** @pat_index: The pat index to use for this operation. */
+	u16 pat_index;
+};
+
+/** struct xe_vma_op_remap - VMA remap operation */
+struct xe_vma_op_remap {
+	/** @prev: VMA preceding part of a split mapping */
+	struct xe_vma *prev;
+	/** @next: VMA subsequent part of a split mapping */
+	struct xe_vma *next;
+	/** @start: start of the VMA unmap */
+	u64 start;
+	/** @range: range of the VMA unmap */
+	u64 range;
+	/** @skip_prev: skip prev rebind */
+	bool skip_prev;
+	/** @skip_next: skip next rebind */
+	bool skip_next;
+	/** @unmap_done: unmap operation in done */
+	bool unmap_done;
+};
+
+/** struct xe_vma_op_prefetch - VMA prefetch operation */
+struct xe_vma_op_prefetch {
+	/** @region: memory region to prefetch to */
+	u32 region;
+};
+
+/** enum xe_vma_op_flags - flags for VMA operation */
+enum xe_vma_op_flags {
+	/** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */
+	XE_VMA_OP_FIRST			= BIT(0),
+	/** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
+	XE_VMA_OP_LAST			= BIT(1),
+	/** @XE_VMA_OP_COMMITTED: VMA operation committed */
+	XE_VMA_OP_COMMITTED		= BIT(2),
+	/** @XE_VMA_OP_PREV_COMMITTED: Previous VMA operation committed */
+	XE_VMA_OP_PREV_COMMITTED	= BIT(3),
+	/** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation committed */
+	XE_VMA_OP_NEXT_COMMITTED	= BIT(4),
+};
+
+/** struct xe_vma_op - VMA operation */
+struct xe_vma_op {
+	/** @base: GPUVA base operation */
+	struct drm_gpuva_op base;
+	/**
+	 * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
+	 * operations is processed
+	 */
+	struct drm_gpuva_ops *ops;
+	/** @q: exec queue for this operation */
+	struct xe_exec_queue *q;
+	/**
+	 * @syncs: syncs for this operation, only used on first and last
+	 * operation
+	 */
+	struct xe_sync_entry *syncs;
+	/** @num_syncs: number of syncs */
+	u32 num_syncs;
+	/** @link: async operation link */
+	struct list_head link;
+	/** @flags: operation flags */
+	enum xe_vma_op_flags flags;
+
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+	/** @inject_error: inject error to test async op error handling */
+	bool inject_error;
+#endif
+
+	union {
+		/** @map: VMA map operation specific data */
+		struct xe_vma_op_map map;
+		/** @remap: VMA remap operation specific data */
+		struct xe_vma_op_remap remap;
+		/** @prefetch: VMA prefetch operation specific data */
+		struct xe_vma_op_prefetch prefetch;
+	};
+};
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
new file mode 100644
index 000000000000..5f61dd87c586
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -0,0 +1,895 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_wa.h"
+
+#include <drm/drm_managed.h>
+#include <kunit/visibility.h>
+#include <linux/compiler_types.h>
+
+#include "generated/xe_wa_oob.h"
+#include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_types.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "xe_rtp.h"
+#include "xe_step.h"
+
+/**
+ * DOC: Hardware workarounds
+ *
+ * Hardware workarounds are register programming documented to be executed in
+ * the driver that fall outside of the normal programming sequences for a
+ * platform. There are some basic categories of workarounds, depending on
+ * how/when they are applied:
+ *
+ * - LRC workarounds: workarounds that touch registers that are
+ *   saved/restored to/from the HW context image. The list is emitted (via Load
+ *   Register Immediate commands) once when initializing the device and saved in
+ *   the default context. That default context is then used on every context
+ *   creation to have a "primed golden context", i.e. a context image that
+ *   already contains the changes needed to all the registers.
+ *
+ * - Engine workarounds: the list of these WAs is applied whenever the specific
+ *   engine is reset. It's also possible that a set of engine classes share a
+ *   common power domain and they are reset together. This happens on some
+ *   platforms with render and compute engines. In this case (at least) one of
+ *   them need to keeep the workaround programming: the approach taken in the
+ *   driver is to tie those workarounds to the first compute/render engine that
+ *   is registered.  When executing with GuC submission, engine resets are
+ *   outside of kernel driver control, hence the list of registers involved in
+ *   written once, on engine initialization, and then passed to GuC, that
+ *   saves/restores their values before/after the reset takes place. See
+ *   ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference.
+ *
+ * - GT workarounds: the list of these WAs is applied whenever these registers
+ *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
+ *
+ * - Register whitelist: some workarounds need to be implemented in userspace,
+ *   but need to touch privileged registers. The whitelist in the kernel
+ *   instructs the hardware to allow the access to happen. From the kernel side,
+ *   this is just a special case of a MMIO workaround (as we write the list of
+ *   these to/be-whitelisted registers to some special HW registers).
+ *
+ * - Workaround batchbuffers: buffers that get executed automatically by the
+ *   hardware on every HW context restore. These buffers are created and
+ *   programmed in the default context so the hardware always go through those
+ *   programming sequences when switching contexts. The support for workaround
+ *   batchbuffers is enabled these hardware mechanisms:
+ *
+ *   #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
+ *      context, pointing the hardware to jump to that location when that offset
+ *      is reached in the context restore. Workaround batchbuffer in the driver
+ *      currently uses this mechanism for all platforms.
+ *
+ *   #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
+ *      pointing the hardware to a buffer to continue executing after the
+ *      engine registers are restored in a context restore sequence. This is
+ *      currently not used in the driver.
+ *
+ * - Other/OOB:  There are WAs that, due to their nature, cannot be applied from
+ *   a central place. Those are peppered around the rest of the code, as needed.
+ *   Workarounds related to the display IP are the main example.
+ *
+ * .. [1] Technically, some registers are powercontext saved & restored, so they
+ *    survive a suspend/resume. In practice, writing them again is not too
+ *    costly and simplifies things, so it's the approach taken in the driver.
+ *
+ * .. note::
+ *    Hardware workarounds in xe work the same way as in i915, with the
+ *    difference of how they are maintained in the code. In xe it uses the
+ *    xe_rtp infrastructure so the workarounds can be kept in tables, following
+ *    a more declarative approach rather than procedural.
+ */
+
+#undef XE_REG_MCR
+#define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
+
+__diag_push();
+__diag_ignore_all("-Woverride-init", "Allow field overrides in table");
+
+static const struct xe_rtp_entry_sr gt_was[] = {
+	{ XE_RTP_NAME("14011060649"),
+	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255),
+		       ENGINE_CLASS(VIDEO_DECODE),
+		       FUNC(xe_rtp_match_even_instance)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+	{ XE_RTP_NAME("14011059788"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_ACTIONS(SET(DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE))
+	},
+	{ XE_RTP_NAME("14015795083"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260)),
+	  XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
+	},
+
+	/* DG1 */
+
+	{ XE_RTP_NAME("1409420604"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("1408615072"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE2_DIS))
+	},
+
+	/* DG2 */
+
+	{ XE_RTP_NAME("16010515920"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
+		       GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+	{ XE_RTP_NAME("22010523718"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
+	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14011006942"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
+	  XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14012362059"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("14012362059"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("14010948348"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14011037102"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14011371254"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14011431319"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(UNSLCGCTL9440,
+			     GAMTLBOACS_CLKGATE_DIS |
+			     GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS |
+			     GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS |
+			     GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS |
+			     GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS |
+			     GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS |
+			     GAMTLBBLT_CLKGATE_DIS),
+			 SET(UNSLCGCTL9444,
+			     GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS |
+			     GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS |
+			     GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS |
+			     GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS |
+			     GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS |
+			     GAMTLBMERT_CLKGATE_DIS |
+			     GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS |
+			     GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14010569222"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14011028019"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14010680813"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_GAMSTLB_CTRL,
+			     CONTROL_BLOCK_CLKGATE_DIS |
+			     EGRESS_BLOCK_CLKGATE_DIS |
+			     TAG_BLOCK_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14014830051"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(CLR(SARB_CHICKEN1, COMP_CKN_IN))
+	},
+	{ XE_RTP_NAME("18018781329"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(COMP_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("1509235366"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_GAMCNTRL_CTRL,
+			     INVALIDATION_BROADCAST_MODE_DIS |
+			     GLOBAL_INVALIDATION_MODE))
+	},
+	{ XE_RTP_NAME("14010648519"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_L3NODEARBCFG, XEHP_LNESPARE))
+	},
+
+	/* PVC */
+
+	{ XE_RTP_NAME("18018781329"),
+	  XE_RTP_RULES(PLATFORM(PVC)),
+	  XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(COMP_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB),
+			 SET(XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("16016694945"),
+	  XE_RTP_RULES(PLATFORM(PVC)),
+	  XE_RTP_ACTIONS(SET(XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC))
+	},
+
+	/* Xe_LPG */
+
+	{ XE_RTP_NAME("14015795083"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
+	},
+	{ XE_RTP_NAME("14018575942"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+	  XE_RTP_ACTIONS(SET(COMP_MOD_CTRL, FORCE_MISS_FTLB))
+	},
+	{ XE_RTP_NAME("22016670082"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+	  XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR))
+	},
+
+	/* Xe_LPM+ */
+
+	{ XE_RTP_NAME("16021867713"),
+	  XE_RTP_RULES(MEDIA_VERSION(1300),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+	{ XE_RTP_NAME("22016670082"),
+	  XE_RTP_RULES(MEDIA_VERSION(1300)),
+	  XE_RTP_ACTIONS(SET(XELPMP_SQCNT1, ENFORCE_RAR))
+	},
+
+	/* Xe2_LPG */
+
+	{ XE_RTP_NAME("16020975621"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, SBEUNIT_CLKGATE_DIS))
+	},
+	{ XE_RTP_NAME("14018157293"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHPC_L3CLOS_MASK(0), ~0),
+			 SET(XEHPC_L3CLOS_MASK(1), ~0),
+			 SET(XEHPC_L3CLOS_MASK(2), ~0),
+			 SET(XEHPC_L3CLOS_MASK(3), ~0))
+	},
+
+	/* Xe2_LPM */
+
+	{ XE_RTP_NAME("14017421178"),
+	  XE_RTP_RULES(MEDIA_VERSION(2000),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+	{ XE_RTP_NAME("16021867713"),
+	  XE_RTP_RULES(MEDIA_VERSION(2000),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+	{ XE_RTP_NAME("14019449301"),
+	  XE_RTP_RULES(MEDIA_VERSION(2000), ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+
+	{}
+};
+
+static const struct xe_rtp_entry_sr engine_was[] = {
+	{ XE_RTP_NAME("22010931296, 18011464164, 14010919138"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(FF_THREAD_MODE(RENDER_RING_BASE),
+			     FF_TESSELATION_DOP_GATE_DISABLE))
+	},
+	{ XE_RTP_NAME("1409804808"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
+		       ENGINE_CLASS(RENDER),
+		       IS_INTEGRATED),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, PUSH_CONST_DEREF_HOLD_DIS))
+	},
+	{ XE_RTP_NAME("14010229206, 1409085225"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200),
+		       ENGINE_CLASS(RENDER),
+		       IS_INTEGRATED),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
+	},
+	{ XE_RTP_NAME("1606931601"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_EARLY_READ))
+	},
+	{ XE_RTP_NAME("14010826681, 1606700617, 22010271021, 18019627453"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1255), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(CS_DEBUG_MODE1(RENDER_RING_BASE),
+			     FF_DOP_CLOCK_GATE_DISABLE))
+	},
+	{ XE_RTP_NAME("1406941453"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, ENABLE_SMALLPL))
+	},
+	{ XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN1(RENDER_RING_BASE),
+			     FFSC_PERCTX_PREEMPT_CTRL))
+	},
+
+	/* TGL */
+
+	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
+	  XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
+			     WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+			     RC_SEMA_IDLE_MSG_DISABLE))
+	},
+
+	/* RKL */
+
+	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
+	  XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
+			     WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+			     RC_SEMA_IDLE_MSG_DISABLE))
+	},
+
+	/* ADL-P */
+
+	{ XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
+	  XE_RTP_RULES(PLATFORM(ALDERLAKE_P), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE),
+			     WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+			     RC_SEMA_IDLE_MSG_DISABLE))
+	},
+
+	/* DG2 */
+
+	{ XE_RTP_NAME("22013037850"),
+	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW,
+			     DISABLE_128B_EVICTION_COMMAND_UDW))
+	},
+	{ XE_RTP_NAME("22014226127"),
+	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE))
+	},
+	{ XE_RTP_NAME("18017747507"),
+	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(VFG_PREEMPTION_CHICKEN,
+			     POLYGON_TRIFAN_LINELOOP_DISABLE))
+	},
+	{ XE_RTP_NAME("22012826095, 22013059131"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW,
+				   MAXREQS_PER_BANK,
+				   REG_FIELD_PREP(MAXREQS_PER_BANK, 2)))
+	},
+	{ XE_RTP_NAME("22012826095, 22013059131"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW,
+				   MAXREQS_PER_BANK,
+				   REG_FIELD_PREP(MAXREQS_PER_BANK, 2)))
+	},
+	{ XE_RTP_NAME("22013059131"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
+	},
+	{ XE_RTP_NAME("22013059131"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
+	},
+	{ XE_RTP_NAME("14010918519"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW,
+			     FORCE_SLM_FENCE_SCOPE_TO_TILE |
+			     FORCE_UGM_FENCE_SCOPE_TO_TILE,
+			     /*
+			      * Ignore read back as it always returns 0 in these
+			      * steps
+			      */
+			     .read_mask = 0))
+	},
+	{ XE_RTP_NAME("14015227452"),
+	  XE_RTP_RULES(PLATFORM(DG2),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE))
+	},
+	{ XE_RTP_NAME("16015675438"),
+	  XE_RTP_RULES(PLATFORM(DG2),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE),
+			     PERF_FIX_BALANCING_CFE_DISABLE))
+	},
+	{ XE_RTP_NAME("18028616096"),
+	  XE_RTP_RULES(PLATFORM(DG2),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3))
+	},
+	{ XE_RTP_NAME("16011620976, 22015475538"),
+	  XE_RTP_RULES(PLATFORM(DG2),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8))
+	},
+	{ XE_RTP_NAME("22012654132"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
+			     /*
+			      * Register can't be read back for verification on
+			      * DG2 due to Wa_14012342262
+			      */
+			     .read_mask = 0))
+	},
+	{ XE_RTP_NAME("22012654132"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
+			     /*
+			      * Register can't be read back for verification on
+			      * DG2 due to Wa_14012342262
+			      */
+			     .read_mask = 0))
+	},
+	{ XE_RTP_NAME("1509727124"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB))
+	},
+	{ XE_RTP_NAME("22012856258"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION))
+	},
+	{ XE_RTP_NAME("14013392000"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE))
+	},
+	{ XE_RTP_NAME("14012419201"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
+			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
+	},
+	{ XE_RTP_NAME("14012419201"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
+			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
+	},
+	{ XE_RTP_NAME("1308578152"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
+		       ENGINE_CLASS(RENDER),
+		       FUNC(xe_rtp_match_first_gslice_fused_off)),
+	  XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1(RENDER_RING_BASE),
+			     REPLAY_MODE_GRANULARITY))
+	},
+	{ XE_RTP_NAME("22010960976, 14013347512"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(CLR(XEHP_HDC_CHICKEN0,
+			     LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK))
+	},
+	{ XE_RTP_NAME("1608949956, 14010198302"),
+	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN,
+			     MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE))
+	},
+	{ XE_RTP_NAME("22010430635"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
+			     DISABLE_GRF_CLEAR))
+	},
+	{ XE_RTP_NAME("14013202645"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
+	},
+	{ XE_RTP_NAME("14013202645"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
+	},
+	{ XE_RTP_NAME("22012532006"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
+			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
+	},
+	{ XE_RTP_NAME("22012532006"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
+			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
+	},
+	{ XE_RTP_NAME("14015150844"),
+	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES,
+			     XE_RTP_NOCHECK))
+	},
+
+	/* PVC */
+
+	{ XE_RTP_NAME("22014226127"),
+	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE))
+	},
+	{ XE_RTP_NAME("14015227452"),
+	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE))
+	},
+	{ XE_RTP_NAME("16015675438"),
+	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE),
+			     PERF_FIX_BALANCING_CFE_DISABLE))
+	},
+	{ XE_RTP_NAME("14014999345"),
+	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE),
+		       GRAPHICS_STEP(B0, C0)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC))
+	},
+
+	/* Xe_LPG */
+
+	{ XE_RTP_NAME("14017856879"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN3, DIS_FIX_EOT1_FLUSH))
+	},
+	{ XE_RTP_NAME("14015150844"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES,
+			     XE_RTP_NOCHECK))
+	},
+
+	/* Xe2_LPG */
+
+	{ XE_RTP_NAME("18032247524"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE))
+	},
+	{ XE_RTP_NAME("16018712365"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS))
+	},
+	{ XE_RTP_NAME("14018957109"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE))
+	},
+	{ XE_RTP_NAME("16021540221"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
+	},
+	{ XE_RTP_NAME("14019322943"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, TGM_WRITE_EOM_FORCE))
+	},
+	{ XE_RTP_NAME("14018471104"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL))
+	},
+	{ XE_RTP_NAME("16018737384"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS))
+	},
+	/*
+	 * These two workarounds are the same, just applying to different
+	 * engines.  Although Wa_18032095049 (for the RCS) isn't required on
+	 * all steppings, disabling these reports has no impact for our
+	 * driver or the GuC, so we go ahead and treat it the same as
+	 * Wa_16021639441 which does apply to all steppings.
+	 */
+	{ XE_RTP_NAME("18032095049, 16021639441"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004)),
+	  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
+			     GHWSP_CSB_REPORT_DIS |
+			     PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
+			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
+
+	{}
+};
+
+static const struct xe_rtp_entry_sr lrc_was[] = {
+	{ XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN3,
+			     DISABLE_CPS_AWARE_COLOR_PIPE))
+	},
+	{ XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+	  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(RENDER_RING_BASE),
+				   PREEMPT_GPGPU_LEVEL_MASK,
+				   PREEMPT_GPGPU_THREAD_GROUP_LEVEL))
+	},
+	{ XE_RTP_NAME("1806527549"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200)),
+	  XE_RTP_ACTIONS(SET(HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE))
+	},
+	{ XE_RTP_NAME("1606376872"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(1200)),
+	  XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC))
+	},
+
+	/* DG1 */
+
+	{ XE_RTP_NAME("1409044764"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_ACTIONS(CLR(COMMON_SLICE_CHICKEN3,
+			     DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN))
+	},
+	{ XE_RTP_NAME("22010493298"),
+	  XE_RTP_RULES(PLATFORM(DG1)),
+	  XE_RTP_ACTIONS(SET(HIZ_CHICKEN,
+			     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE))
+	},
+
+	/* DG2 */
+
+	{ XE_RTP_NAME("16011186671"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH),
+			 SET(VFLSKPD, DIS_OVER_FETCH_CACHE))
+	},
+	{ XE_RTP_NAME("14010469329"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
+			     XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE))
+	},
+	{ XE_RTP_NAME("14010698770, 22010613112, 22010465075"),
+	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
+	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
+			     DISABLE_CPS_AWARE_COLOR_PIPE))
+	},
+	{ XE_RTP_NAME("16013271637"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1,
+			     MSC_MSAA_REODER_BUF_BYPASS_DISABLE))
+	},
+	{ XE_RTP_NAME("14014947963"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(FIELD_SET(VF_PREEMPTION,
+				   PREEMPTION_VERTEX_COUNT,
+				   0x4000))
+	},
+	{ XE_RTP_NAME("18018764978"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_MODE2,
+			     SCOREBOARD_STALL_FLUSH_CONTROL))
+	},
+	{ XE_RTP_NAME("18019271663"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
+	},
+	{ XE_RTP_NAME("14019877138"),
+	  XE_RTP_RULES(PLATFORM(DG2)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
+	},
+
+	/* PVC */
+
+	{ XE_RTP_NAME("16017236439"),
+	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COPY),
+		       FUNC(xe_rtp_match_even_instance)),
+	  XE_RTP_ACTIONS(SET(BCS_SWCTRL(0),
+			     BCS_SWCTRL_DISABLE_256B,
+			     XE_RTP_ACTION_FLAG(ENGINE_BASE))),
+	},
+
+	/* Xe_LPG */
+
+	{ XE_RTP_NAME("18019271663"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
+	},
+
+	/* Xe2_LPG */
+
+	{ XE_RTP_NAME("16020518922"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(FF_MODE,
+			     DIS_TE_AUTOSTRIP |
+			     DIS_MESH_PARTIAL_AUTOSTRIP |
+			     DIS_MESH_AUTOSTRIP),
+			 SET(VFLSKPD,
+			     DIS_PARTIAL_AUTOSTRIP |
+			     DIS_AUTOSTRIP))
+	},
+	{ XE_RTP_NAME("14019386621"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE))
+	},
+	{ XE_RTP_NAME("14019877138"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
+	},
+	{ XE_RTP_NAME("14020013138"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
+	},
+	{ XE_RTP_NAME("14019988906"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
+	},
+
+	{}
+};
+
+static __maybe_unused const struct xe_rtp_entry oob_was[] = {
+#include <generated/xe_wa_oob.c>
+	{}
+};
+
+static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT);
+
+__diag_pop();
+
+/**
+ * xe_wa_process_oob - process OOB workaround table
+ * @gt: GT instance to process workarounds for
+ *
+ * Process OOB workaround table for this platform, marking in @gt the
+ * workarounds that are active.
+ */
+void xe_wa_process_oob(struct xe_gt *gt)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
+
+	xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.oob,
+						  ARRAY_SIZE(oob_was));
+	xe_rtp_process(&ctx, oob_was);
+}
+
+/**
+ * xe_wa_process_gt - process GT workaround table
+ * @gt: GT instance to process workarounds for
+ *
+ * Process GT workaround table for this platform, saving in @gt all the
+ * workarounds that need to be applied at the GT level.
+ */
+void xe_wa_process_gt(struct xe_gt *gt)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
+
+	xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.gt,
+						  ARRAY_SIZE(gt_was));
+	xe_rtp_process_to_sr(&ctx, gt_was, &gt->reg_sr);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt);
+
+/**
+ * xe_wa_process_engine - process engine workaround table
+ * @hwe: engine instance to process workarounds for
+ *
+ * Process engine workaround table for this platform, saving in @hwe all the
+ * workarounds that need to be applied at the engine level that match this
+ * engine.
+ */
+void xe_wa_process_engine(struct xe_hw_engine *hwe)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.engine,
+						  ARRAY_SIZE(engine_was));
+	xe_rtp_process_to_sr(&ctx, engine_was, &hwe->reg_sr);
+}
+
+/**
+ * xe_wa_process_lrc - process context workaround table
+ * @hwe: engine instance to process workarounds for
+ *
+ * Process context workaround table for this platform, saving in @hwe all the
+ * workarounds that need to be applied on context restore. These are workarounds
+ * touching registers that are part of the HW context image.
+ */
+void xe_wa_process_lrc(struct xe_hw_engine *hwe)
+{
+	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
+
+	xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.lrc,
+						  ARRAY_SIZE(lrc_was));
+	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
+}
+
+/**
+ * xe_wa_init - initialize gt with workaround bookkeeping
+ * @gt: GT instance to initialize
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_wa_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	size_t n_oob, n_lrc, n_engine, n_gt, total;
+	unsigned long *p;
+
+	n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_was));
+	n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_was));
+	n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_was));
+	n_oob = BITS_TO_LONGS(ARRAY_SIZE(oob_was));
+	total = n_gt + n_engine + n_lrc + n_oob;
+
+	p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	gt->wa_active.gt = p;
+	p += n_gt;
+	gt->wa_active.engine = p;
+	p += n_engine;
+	gt->wa_active.lrc = p;
+	p += n_lrc;
+	gt->wa_active.oob = p;
+
+	return 0;
+}
+
+void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	size_t idx;
+
+	drm_printf(p, "GT Workarounds\n");
+	for_each_set_bit(idx, gt->wa_active.gt, ARRAY_SIZE(gt_was))
+		drm_printf_indent(p, 1, "%s\n", gt_was[idx].name);
+
+	drm_printf(p, "\nEngine Workarounds\n");
+	for_each_set_bit(idx, gt->wa_active.engine, ARRAY_SIZE(engine_was))
+		drm_printf_indent(p, 1, "%s\n", engine_was[idx].name);
+
+	drm_printf(p, "\nLRC Workarounds\n");
+	for_each_set_bit(idx, gt->wa_active.lrc, ARRAY_SIZE(lrc_was))
+		drm_printf_indent(p, 1, "%s\n", lrc_was[idx].name);
+
+	drm_printf(p, "\nOOB Workarounds\n");
+	for_each_set_bit(idx, gt->wa_active.oob, ARRAY_SIZE(oob_was))
+		if (oob_was[idx].name)
+			drm_printf_indent(p, 1, "%s\n", oob_was[idx].name);
+}
+
+/*
+ * Apply tile (non-GT, non-display) workarounds.  Think very carefully before
+ * adding anything to this function; most workarounds should be implemented
+ * elsewhere.  The programming here is primarily for sgunit/soc workarounds,
+ * which are relatively rare.  Since the registers these workarounds target are
+ * outside the GT, they should only need to be applied once at device
+ * probe/resume; they will not lose their values on any kind of GT or engine
+ * reset.
+ *
+ * TODO:  We may want to move this over to xe_rtp in the future once we have
+ * enough workarounds to justify the work.
+ */
+void xe_wa_apply_tile_workarounds(struct xe_tile *tile)
+{
+	struct xe_gt *mmio = tile->primary_gt;
+
+	if (XE_WA(mmio, 22010954014))
+		xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS);
+}
diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h
new file mode 100644
index 000000000000..1b24d66f9d80
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wa.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WA_
+#define _XE_WA_
+
+struct drm_printer;
+struct xe_gt;
+struct xe_hw_engine;
+struct xe_tile;
+
+int xe_wa_init(struct xe_gt *gt);
+void xe_wa_process_oob(struct xe_gt *gt);
+void xe_wa_process_gt(struct xe_gt *gt);
+void xe_wa_process_engine(struct xe_hw_engine *hwe);
+void xe_wa_process_lrc(struct xe_hw_engine *hwe);
+void xe_wa_apply_tile_workarounds(struct xe_tile *tile);
+
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
+void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p);
+
+/**
+ * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any
+ *         other more specific type
+ * @gt__: gt instance
+ * @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h
+ */
+#define XE_WA(gt__, id__) test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
new file mode 100644
index 000000000000..727bdc429212
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -0,0 +1,24 @@
+22012773006	GRAPHICS_VERSION_RANGE(1200, 1250)
+16011759253	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)
+14014475959	GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)
+		PLATFORM(DG2)
+22011391025	PLATFORM(DG2)
+14012197797	PLATFORM(DG2), GRAPHICS_STEP(A0, B0)
+16011777198	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
+		SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)
+22012727170	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
+		SUBPLATFORM(DG2, G11)
+22012727685	SUBPLATFORM(DG2, G11)
+16015675438	PLATFORM(PVC)
+		SUBPLATFORM(DG2, G10)
+		SUBPLATFORM(DG2, G12)
+18020744125	PLATFORM(PVC)
+1509372804	PLATFORM(PVC), GRAPHICS_STEP(A0, C0)
+1409600907	GRAPHICS_VERSION_RANGE(1200, 1250)
+14016763929	SUBPLATFORM(DG2, G10)
+		SUBPLATFORM(DG2, G12)
+16017236439	PLATFORM(PVC)
+22010954014	PLATFORM(DG2)
+14019821291	MEDIA_VERSION_RANGE(1300, 2000)
+14015076503	MEDIA_VERSION(1300)
+16020292621	GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
new file mode 100644
index 000000000000..a75eeba7bfe5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_wait_user_fence.h"
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_utils.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_exec_queue.h"
+
+static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
+{
+	u64 rvalue;
+	int err;
+	bool passed;
+
+	err = copy_from_user(&rvalue, u64_to_user_ptr(addr), sizeof(rvalue));
+	if (err)
+		return -EFAULT;
+
+	switch (op) {
+	case DRM_XE_UFENCE_WAIT_OP_EQ:
+		passed = (rvalue & mask) == (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_OP_NEQ:
+		passed = (rvalue & mask) != (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_OP_GT:
+		passed = (rvalue & mask) > (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_OP_GTE:
+		passed = (rvalue & mask) >= (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_OP_LT:
+		passed = (rvalue & mask) < (value & mask);
+		break;
+	case DRM_XE_UFENCE_WAIT_OP_LTE:
+		passed = (rvalue & mask) <= (value & mask);
+		break;
+	default:
+		XE_WARN_ON("Not possible");
+		return -EINVAL;
+	}
+
+	return passed ? 0 : 1;
+}
+
+#define VALID_FLAGS	DRM_XE_UFENCE_WAIT_FLAG_ABSTIME
+#define MAX_OP		DRM_XE_UFENCE_WAIT_OP_LTE
+
+static long to_jiffies_timeout(struct xe_device *xe,
+			       struct drm_xe_wait_user_fence *args)
+{
+	unsigned long long t;
+	long timeout;
+
+	/*
+	 * For negative timeout we want to wait "forever" by setting
+	 * MAX_SCHEDULE_TIMEOUT. But we have to assign this value also
+	 * to args->timeout to avoid being zeroed on the signal delivery
+	 * (see arithmetics after wait).
+	 */
+	if (args->timeout < 0) {
+		args->timeout = MAX_SCHEDULE_TIMEOUT;
+		return MAX_SCHEDULE_TIMEOUT;
+	}
+
+	if (args->timeout == 0)
+		return 0;
+
+	/*
+	 * Save the timeout to an u64 variable because nsecs_to_jiffies
+	 * might return a value that overflows s32 variable.
+	 */
+	if (args->flags & DRM_XE_UFENCE_WAIT_FLAG_ABSTIME)
+		t = drm_timeout_abs_to_jiffies(args->timeout);
+	else
+		t = nsecs_to_jiffies(args->timeout);
+
+	/*
+	 * Anything greater then MAX_SCHEDULE_TIMEOUT is meaningless,
+	 * also we don't want to cap it at MAX_SCHEDULE_TIMEOUT because
+	 * apparently user doesn't mean to wait forever, otherwise the
+	 * args->timeout should have been set to a negative value.
+	 */
+	if (t > MAX_SCHEDULE_TIMEOUT)
+		timeout = MAX_SCHEDULE_TIMEOUT - 1;
+	else
+		timeout = t;
+
+	return timeout ?: 1;
+}
+
+int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	DEFINE_WAIT_FUNC(w_wait, woken_wake_function);
+	struct drm_xe_wait_user_fence *args = data;
+	struct xe_exec_queue *q = NULL;
+	u64 addr = args->addr;
+	int err = 0;
+	long timeout;
+	ktime_t start;
+
+	if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) ||
+	    XE_IOCTL_DBG(xe, args->pad2) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->flags & ~VALID_FLAGS))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->op > MAX_OP))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, addr & 0x7))
+		return -EINVAL;
+
+	if (args->exec_queue_id) {
+		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+		if (XE_IOCTL_DBG(xe, !q))
+			return -ENOENT;
+	}
+
+	timeout = to_jiffies_timeout(xe, args);
+
+	start = ktime_get();
+
+	add_wait_queue(&xe->ufence_wq, &w_wait);
+	for (;;) {
+		err = do_compare(addr, args->value, args->mask, args->op);
+		if (err <= 0)
+			break;
+
+		if (signal_pending(current)) {
+			err = -ERESTARTSYS;
+			break;
+		}
+
+		if (q) {
+			if (q->ops->reset_status(q)) {
+				drm_info(&xe->drm, "exec gueue reset detected\n");
+				err = -EIO;
+				break;
+			}
+		}
+
+		if (!timeout) {
+			err = -ETIME;
+			break;
+		}
+
+		timeout = wait_woken(&w_wait, TASK_INTERRUPTIBLE, timeout);
+	}
+	remove_wait_queue(&xe->ufence_wq, &w_wait);
+
+	if (!(args->flags & DRM_XE_UFENCE_WAIT_FLAG_ABSTIME)) {
+		args->timeout -= ktime_to_ns(ktime_sub(ktime_get(), start));
+		if (args->timeout < 0)
+			args->timeout = 0;
+	}
+
+	if (!timeout && !(err < 0))
+		err = -ETIME;
+
+	if (q)
+		xe_exec_queue_put(q);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.h b/drivers/gpu/drm/xe/xe_wait_user_fence.h
new file mode 100644
index 000000000000..0e268978f9e6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WAIT_USER_FENCE_H_
+#define _XE_WAIT_USER_FENCE_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
new file mode 100644
index 000000000000..d3a99157e523
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_wopcm.h"
+
+#include "regs/xe_guc_regs.h"
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "xe_uc_fw.h"
+
+/**
+ * DOC: Write Once Protected Content Memory (WOPCM) Layout
+ *
+ * The layout of the WOPCM will be fixed after writing to GuC WOPCM size and
+ * offset registers whose values are calculated and determined by HuC/GuC
+ * firmware size and set of hardware requirements/restrictions as shown below:
+ *
+ * ::
+ *
+ *    +=========> +====================+ <== WOPCM Top
+ *    ^           |  HW contexts RSVD  |
+ *    |     +===> +====================+ <== GuC WOPCM Top
+ *    |     ^     |                    |
+ *    |     |     |                    |
+ *    |     |     |                    |
+ *    |    GuC    |                    |
+ *    |   WOPCM   |                    |
+ *    |    Size   +--------------------+
+ *  WOPCM   |     |    GuC FW RSVD     |
+ *    |     |     +--------------------+
+ *    |     |     |   GuC Stack RSVD   |
+ *    |     |     +------------------- +
+ *    |     v     |   GuC WOPCM RSVD   |
+ *    |     +===> +====================+ <== GuC WOPCM base
+ *    |           |     WOPCM RSVD     |
+ *    |           +------------------- + <== HuC Firmware Top
+ *    v           |      HuC FW        |
+ *    +=========> +====================+ <== WOPCM Base
+ *
+ * GuC accessible WOPCM starts at GuC WOPCM base and ends at GuC WOPCM top.
+ * The top part of the WOPCM is reserved for hardware contexts (e.g. RC6
+ * context).
+ */
+
+/* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */
+/* FIXME: Larger size require for 2 tile PVC, do a proper probe sooner or later */
+#define DGFX_WOPCM_SIZE			SZ_4M
+/* FIXME: Larger size require for MTL, do a proper probe sooner or later */
+#define MTL_WOPCM_SIZE			SZ_4M
+#define WOPCM_SIZE			SZ_2M
+
+#define MAX_WOPCM_SIZE			SZ_8M
+
+/* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */
+#define WOPCM_RESERVED_SIZE		SZ_16K
+
+/* 16KB reserved at the beginning of GuC WOPCM. */
+#define GUC_WOPCM_RESERVED		SZ_16K
+/* 8KB from GUC_WOPCM_RESERVED is reserved for GuC stack. */
+#define GUC_WOPCM_STACK_RESERVED	SZ_8K
+
+/* GuC WOPCM Offset value needs to be aligned to 16KB. */
+#define GUC_WOPCM_OFFSET_ALIGNMENT	(1UL << GUC_WOPCM_OFFSET_SHIFT)
+
+/* 36KB WOPCM reserved at the end of WOPCM */
+#define WOPCM_HW_CTX_RESERVED		(SZ_32K + SZ_4K)
+
+static inline struct xe_gt *wopcm_to_gt(struct xe_wopcm *wopcm)
+{
+	return container_of(wopcm, struct xe_gt, uc.wopcm);
+}
+
+static inline struct xe_device *wopcm_to_xe(struct xe_wopcm *wopcm)
+{
+	return gt_to_xe(wopcm_to_gt(wopcm));
+}
+
+static u32 context_reserved_size(void)
+{
+	return WOPCM_HW_CTX_RESERVED;
+}
+
+static bool __check_layout(struct xe_device *xe, u32 wopcm_size,
+			   u32 guc_wopcm_base, u32 guc_wopcm_size,
+			   u32 guc_fw_size, u32 huc_fw_size)
+{
+	const u32 ctx_rsvd = context_reserved_size();
+	u32 size;
+
+	size = wopcm_size - ctx_rsvd;
+	if (unlikely(guc_wopcm_base >= size ||
+		     guc_wopcm_size > size - guc_wopcm_base)) {
+		drm_err(&xe->drm,
+			"WOPCM: invalid GuC region layout: %uK + %uK > %uK\n",
+			guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K,
+			size / SZ_1K);
+		return false;
+	}
+
+	size = guc_fw_size + GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED;
+	if (unlikely(guc_wopcm_size < size)) {
+		drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n",
+			xe_uc_fw_type_repr(XE_UC_FW_TYPE_GUC),
+			guc_wopcm_size / SZ_1K, size / SZ_1K);
+		return false;
+	}
+
+	size = huc_fw_size + WOPCM_RESERVED_SIZE;
+	if (unlikely(guc_wopcm_base < size)) {
+		drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n",
+			xe_uc_fw_type_repr(XE_UC_FW_TYPE_HUC),
+			guc_wopcm_base / SZ_1K, size / SZ_1K);
+		return false;
+	}
+
+	return true;
+}
+
+static bool __wopcm_regs_locked(struct xe_gt *gt,
+				u32 *guc_wopcm_base, u32 *guc_wopcm_size)
+{
+	u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET);
+	u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE);
+
+	if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) ||
+	    !(reg_base & GUC_WOPCM_OFFSET_VALID))
+		return false;
+
+	*guc_wopcm_base = reg_base & GUC_WOPCM_OFFSET_MASK;
+	*guc_wopcm_size = reg_size & GUC_WOPCM_SIZE_MASK;
+	return true;
+}
+
+static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt,
+			     struct xe_wopcm *wopcm)
+{
+	u32 base = wopcm->guc.base;
+	u32 size = wopcm->guc.size;
+	u32 huc_agent = xe_uc_fw_is_available(&gt->uc.huc.fw) ? HUC_LOADING_AGENT_GUC : 0;
+	u32 mask;
+	int err;
+
+	XE_WARN_ON(!(base & GUC_WOPCM_OFFSET_MASK));
+	XE_WARN_ON(base & ~GUC_WOPCM_OFFSET_MASK);
+	XE_WARN_ON(!(size & GUC_WOPCM_SIZE_MASK));
+	XE_WARN_ON(size & ~GUC_WOPCM_SIZE_MASK);
+
+	mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
+	err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE, size, mask,
+					 size | GUC_WOPCM_SIZE_LOCKED);
+	if (err)
+		goto err_out;
+
+	mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent;
+	err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET,
+					 base | huc_agent, mask,
+					 base | huc_agent |
+					 GUC_WOPCM_OFFSET_VALID);
+	if (err)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n");
+	drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
+		   DMA_GUC_WOPCM_OFFSET.addr,
+		   xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET));
+	drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
+		   GUC_WOPCM_SIZE.addr,
+		   xe_mmio_read32(gt, GUC_WOPCM_SIZE));
+
+	return err;
+}
+
+u32 xe_wopcm_size(struct xe_device *xe)
+{
+	return IS_DGFX(xe) ? DGFX_WOPCM_SIZE :
+		xe->info.platform == XE_METEORLAKE ? MTL_WOPCM_SIZE :
+		WOPCM_SIZE;
+}
+
+/**
+ * xe_wopcm_init() - Initialize the WOPCM structure.
+ * @wopcm: pointer to xe_wopcm.
+ *
+ * This function will partition WOPCM space based on GuC and HuC firmware sizes
+ * and will allocate max remaining for use by GuC. This function will also
+ * enforce platform dependent hardware restrictions on GuC WOPCM offset and
+ * size. It will fail the WOPCM init if any of these checks fail, so that the
+ * following WOPCM registers setup and GuC firmware uploading would be aborted.
+ */
+int xe_wopcm_init(struct xe_wopcm *wopcm)
+{
+	struct xe_device *xe = wopcm_to_xe(wopcm);
+	struct xe_gt *gt = wopcm_to_gt(wopcm);
+	u32 guc_fw_size = xe_uc_fw_get_upload_size(&gt->uc.guc.fw);
+	u32 huc_fw_size = xe_uc_fw_get_upload_size(&gt->uc.huc.fw);
+	u32 ctx_rsvd = context_reserved_size();
+	u32 guc_wopcm_base;
+	u32 guc_wopcm_size;
+	bool locked;
+	int ret = 0;
+
+	if (!guc_fw_size)
+		return -EINVAL;
+
+	wopcm->size = xe_wopcm_size(xe);
+	drm_dbg(&xe->drm, "WOPCM: %uK\n", wopcm->size / SZ_1K);
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+	XE_WARN_ON(guc_fw_size >= wopcm->size);
+	XE_WARN_ON(huc_fw_size >= wopcm->size);
+	XE_WARN_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size);
+
+	locked = __wopcm_regs_locked(gt, &guc_wopcm_base, &guc_wopcm_size);
+	if (locked) {
+		drm_dbg(&xe->drm, "GuC WOPCM is already locked [%uK, %uK)\n",
+			guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+		/*
+		 * When the GuC wopcm base and size are preprogrammed by
+		 * BIOS/IFWI, check against the max allowed wopcm size to
+		 * validate if the programmed values align to the wopcm layout.
+		 */
+		wopcm->size = MAX_WOPCM_SIZE;
+
+		goto check;
+	}
+
+	/*
+	 * Aligned value of guc_wopcm_base will determine available WOPCM space
+	 * for HuC firmware and mandatory reserved area.
+	 */
+	guc_wopcm_base = huc_fw_size + WOPCM_RESERVED_SIZE;
+	guc_wopcm_base = ALIGN(guc_wopcm_base, GUC_WOPCM_OFFSET_ALIGNMENT);
+
+	/*
+	 * Need to clamp guc_wopcm_base now to make sure the following math is
+	 * correct. Formal check of whole WOPCM layout will be done below.
+	 */
+	guc_wopcm_base = min(guc_wopcm_base, wopcm->size - ctx_rsvd);
+
+	/* Aligned remainings of usable WOPCM space can be assigned to GuC. */
+	guc_wopcm_size = wopcm->size - ctx_rsvd - guc_wopcm_base;
+	guc_wopcm_size &= GUC_WOPCM_SIZE_MASK;
+
+	drm_dbg(&xe->drm, "Calculated GuC WOPCM [%uK, %uK)\n",
+		guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+
+check:
+	if (__check_layout(xe, wopcm->size, guc_wopcm_base, guc_wopcm_size,
+			   guc_fw_size, huc_fw_size)) {
+		wopcm->guc.base = guc_wopcm_base;
+		wopcm->guc.size = guc_wopcm_size;
+		XE_WARN_ON(!wopcm->guc.base);
+		XE_WARN_ON(!wopcm->guc.size);
+	} else {
+		drm_notice(&xe->drm, "Unsuccessful WOPCM partitioning\n");
+		return -E2BIG;
+	}
+
+	if (!locked)
+		ret = __wopcm_init_regs(xe, gt, wopcm);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_wopcm.h b/drivers/gpu/drm/xe/xe_wopcm.h
new file mode 100644
index 000000000000..0197a282460b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WOPCM_H_
+#define _XE_WOPCM_H_
+
+#include "xe_wopcm_types.h"
+
+struct xe_device;
+
+int xe_wopcm_init(struct xe_wopcm *wopcm);
+u32 xe_wopcm_size(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wopcm_types.h b/drivers/gpu/drm/xe/xe_wopcm_types.h
new file mode 100644
index 000000000000..486d850c4084
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm_types.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WOPCM_TYPES_H_
+#define _XE_WOPCM_TYPES_H_
+
+#include <linux/types.h>
+
+/**
+ * struct xe_wopcm - Overall WOPCM info and WOPCM regions.
+ */
+struct xe_wopcm {
+	/** @size: Size of overall WOPCM */
+	u32 size;
+	/** @guc: GuC WOPCM Region info */
+	struct {
+		/** @base: GuC WOPCM base which is offset from WOPCM base */
+		u32 base;
+		/** @size: Size of the GuC WOPCM region */
+		u32 size;
+	} guc;
+};
+
+#endif
diff --git a/drivers/greybus/Kconfig b/drivers/greybus/Kconfig
index 033d31dbf3b8..ab81ceceb337 100644
--- a/drivers/greybus/Kconfig
+++ b/drivers/greybus/Kconfig
@@ -20,6 +20,7 @@ if GREYBUS
 config GREYBUS_BEAGLEPLAY
 	tristate "Greybus BeaglePlay driver"
 	depends on SERIAL_DEV_BUS
+	select CRC_CCITT
 	help
 	  Select this option if you have a BeaglePlay where CC1352
 	  co-processor acts as Greybus SVC.
diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c
index 8db740214ffd..703666b95bf4 100644
--- a/drivers/hwmon/acpi_power_meter.c
+++ b/drivers/hwmon/acpi_power_meter.c
@@ -31,6 +31,7 @@
 #define POWER_METER_CAN_NOTIFY	(1 << 3)
 #define POWER_METER_IS_BATTERY	(1 << 8)
 #define UNKNOWN_HYSTERESIS	0xFFFFFFFF
+#define UNKNOWN_POWER		0xFFFFFFFF
 
 #define METER_NOTIFY_CONFIG	0x80
 #define METER_NOTIFY_TRIP	0x81
@@ -348,6 +349,9 @@ static ssize_t show_power(struct device *dev,
 	update_meter(resource);
 	mutex_unlock(&resource->lock);
 
+	if (resource->power == UNKNOWN_POWER)
+		return -ENODATA;
+
 	return sprintf(buf, "%llu\n", resource->power * 1000);
 }
 
diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c
index 904890598c11..2c7c92272fe3 100644
--- a/drivers/hwmon/corsair-psu.c
+++ b/drivers/hwmon/corsair-psu.c
@@ -899,7 +899,23 @@ static struct hid_driver corsairpsu_driver = {
 	.reset_resume	= corsairpsu_resume,
 #endif
 };
-module_hid_driver(corsairpsu_driver);
+
+static int __init corsair_init(void)
+{
+	return hid_register_driver(&corsairpsu_driver);
+}
+
+static void __exit corsair_exit(void)
+{
+	hid_unregister_driver(&corsairpsu_driver);
+}
+
+/*
+ * With module_init() the driver would load before the HID bus when
+ * built-in, so use late_initcall() instead.
+ */
+late_initcall(corsair_init);
+module_exit(corsair_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Wilken Gottwalt <wilken.gottwalt@posteo.net>");
diff --git a/drivers/hwmon/ltc2991.c b/drivers/hwmon/ltc2991.c
index bd63c61129a9..fc53fdcb2b6c 100644
--- a/drivers/hwmon/ltc2991.c
+++ b/drivers/hwmon/ltc2991.c
@@ -373,7 +373,7 @@ static int ltc2991_init(struct ltc2991_state *st)
 			   LTC2991_REPEAT_ACQ_EN);
 	if (ret)
 		return dev_err_probe(st->dev, ret,
-				     "Error: Failed to set contiuous mode.\n");
+				     "Error: Failed to set continuous mode.\n");
 
 	/* Enable all channels and trigger conversions */
 	return regmap_write(st->regmap, LTC2991_CH_EN_TRIGGER,
diff --git a/drivers/hwmon/max31827.c b/drivers/hwmon/max31827.c
index fd1fed1a797c..a1ce65145669 100644
--- a/drivers/hwmon/max31827.c
+++ b/drivers/hwmon/max31827.c
@@ -12,6 +12,7 @@
 #include <linux/i2c.h>
 #include <linux/mutex.h>
 #include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
 
 #define MAX31827_T_REG			0x0
 #define MAX31827_CONFIGURATION_REG	0x2
diff --git a/drivers/hwmon/nzxt-kraken2.c b/drivers/hwmon/nzxt-kraken2.c
index 428c77b5fce5..7caf387eb144 100644
--- a/drivers/hwmon/nzxt-kraken2.c
+++ b/drivers/hwmon/nzxt-kraken2.c
@@ -161,13 +161,13 @@ static int kraken2_probe(struct hid_device *hdev,
 	ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW);
 	if (ret) {
 		hid_err(hdev, "hid hw start failed with %d\n", ret);
-		goto fail_and_stop;
+		return ret;
 	}
 
 	ret = hid_hw_open(hdev);
 	if (ret) {
 		hid_err(hdev, "hid hw open failed with %d\n", ret);
-		goto fail_and_close;
+		goto fail_and_stop;
 	}
 
 	priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, "kraken2",
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 5ca6278baff4..89e8ed214ea4 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -493,7 +493,7 @@ static void etm_event_start(struct perf_event *event, int flags)
 		goto fail_end_stop;
 
 	/* Finally enable the tracer */
-	if (coresight_enable_source(csdev, CS_MODE_PERF, event))
+	if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
 		goto fail_disable_path;
 
 	/*
@@ -587,7 +587,7 @@ static void etm_event_stop(struct perf_event *event, int mode)
 		return;
 
 	/* stop tracer */
-	coresight_disable_source(csdev, event);
+	source_ops(csdev)->disable(csdev, event);
 
 	/* tell the core */
 	event->hw.state = PERF_HES_STOPPED;
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 77b0271ce6eb..34aee59dd147 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -2224,7 +2224,7 @@ static void clear_etmdrvdata(void *info)
 	per_cpu(delayed_probe, cpu) = NULL;
 }
 
-static void __exit etm4_remove_dev(struct etmv4_drvdata *drvdata)
+static void etm4_remove_dev(struct etmv4_drvdata *drvdata)
 {
 	bool had_delayed_probe;
 	/*
@@ -2253,7 +2253,7 @@ static void __exit etm4_remove_dev(struct etmv4_drvdata *drvdata)
 	}
 }
 
-static void __exit etm4_remove_amba(struct amba_device *adev)
+static void etm4_remove_amba(struct amba_device *adev)
 {
 	struct etmv4_drvdata *drvdata = dev_get_drvdata(&adev->dev);
 
@@ -2261,7 +2261,7 @@ static void __exit etm4_remove_amba(struct amba_device *adev)
 		etm4_remove_dev(drvdata);
 }
 
-static int __exit etm4_remove_platform_dev(struct platform_device *pdev)
+static int etm4_remove_platform_dev(struct platform_device *pdev)
 {
 	struct etmv4_drvdata *drvdata = dev_get_drvdata(&pdev->dev);
 
diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c
index e9a32a97fbee..6e32d31a95fe 100644
--- a/drivers/hwtracing/coresight/ultrasoc-smb.c
+++ b/drivers/hwtracing/coresight/ultrasoc-smb.c
@@ -99,7 +99,7 @@ static int smb_open(struct inode *inode, struct file *file)
 					struct smb_drv_data, miscdev);
 	int ret = 0;
 
-	mutex_lock(&drvdata->mutex);
+	spin_lock(&drvdata->spinlock);
 
 	if (drvdata->reading) {
 		ret = -EBUSY;
@@ -115,7 +115,7 @@ static int smb_open(struct inode *inode, struct file *file)
 
 	drvdata->reading = true;
 out:
-	mutex_unlock(&drvdata->mutex);
+	spin_unlock(&drvdata->spinlock);
 
 	return ret;
 }
@@ -132,10 +132,8 @@ static ssize_t smb_read(struct file *file, char __user *data, size_t len,
 	if (!len)
 		return 0;
 
-	mutex_lock(&drvdata->mutex);
-
 	if (!sdb->data_size)
-		goto out;
+		return 0;
 
 	to_copy = min(sdb->data_size, len);
 
@@ -145,20 +143,15 @@ static ssize_t smb_read(struct file *file, char __user *data, size_t len,
 
 	if (copy_to_user(data, sdb->buf_base + sdb->buf_rdptr, to_copy)) {
 		dev_dbg(dev, "Failed to copy data to user\n");
-		to_copy = -EFAULT;
-		goto out;
+		return -EFAULT;
 	}
 
 	*ppos += to_copy;
-
 	smb_update_read_ptr(drvdata, to_copy);
-
-	dev_dbg(dev, "%zu bytes copied\n", to_copy);
-out:
 	if (!sdb->data_size)
 		smb_reset_buffer(drvdata);
-	mutex_unlock(&drvdata->mutex);
 
+	dev_dbg(dev, "%zu bytes copied\n", to_copy);
 	return to_copy;
 }
 
@@ -167,9 +160,9 @@ static int smb_release(struct inode *inode, struct file *file)
 	struct smb_drv_data *drvdata = container_of(file->private_data,
 					struct smb_drv_data, miscdev);
 
-	mutex_lock(&drvdata->mutex);
+	spin_lock(&drvdata->spinlock);
 	drvdata->reading = false;
-	mutex_unlock(&drvdata->mutex);
+	spin_unlock(&drvdata->spinlock);
 
 	return 0;
 }
@@ -262,7 +255,7 @@ static int smb_enable(struct coresight_device *csdev, enum cs_mode mode,
 	struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent);
 	int ret = 0;
 
-	mutex_lock(&drvdata->mutex);
+	spin_lock(&drvdata->spinlock);
 
 	/* Do nothing, the trace data is reading by other interface now */
 	if (drvdata->reading) {
@@ -294,7 +287,7 @@ static int smb_enable(struct coresight_device *csdev, enum cs_mode mode,
 
 	dev_dbg(&csdev->dev, "Ultrasoc SMB enabled\n");
 out:
-	mutex_unlock(&drvdata->mutex);
+	spin_unlock(&drvdata->spinlock);
 
 	return ret;
 }
@@ -304,7 +297,7 @@ static int smb_disable(struct coresight_device *csdev)
 	struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent);
 	int ret = 0;
 
-	mutex_lock(&drvdata->mutex);
+	spin_lock(&drvdata->spinlock);
 
 	if (drvdata->reading) {
 		ret = -EBUSY;
@@ -327,7 +320,7 @@ static int smb_disable(struct coresight_device *csdev)
 
 	dev_dbg(&csdev->dev, "Ultrasoc SMB disabled\n");
 out:
-	mutex_unlock(&drvdata->mutex);
+	spin_unlock(&drvdata->spinlock);
 
 	return ret;
 }
@@ -408,7 +401,7 @@ static unsigned long smb_update_buffer(struct coresight_device *csdev,
 	if (!buf)
 		return 0;
 
-	mutex_lock(&drvdata->mutex);
+	spin_lock(&drvdata->spinlock);
 
 	/* Don't do anything if another tracer is using this sink. */
 	if (atomic_read(&csdev->refcnt) != 1)
@@ -432,7 +425,7 @@ static unsigned long smb_update_buffer(struct coresight_device *csdev,
 	if (!buf->snapshot && lost)
 		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
 out:
-	mutex_unlock(&drvdata->mutex);
+	spin_unlock(&drvdata->spinlock);
 
 	return data_size;
 }
@@ -484,7 +477,6 @@ static int smb_init_data_buffer(struct platform_device *pdev,
 static void smb_init_hw(struct smb_drv_data *drvdata)
 {
 	smb_disable_hw(drvdata);
-	smb_reset_buffer(drvdata);
 
 	writel(SMB_LB_CFG_LO_DEFAULT, drvdata->base + SMB_LB_CFG_LO_REG);
 	writel(SMB_LB_CFG_HI_DEFAULT, drvdata->base + SMB_LB_CFG_HI_REG);
@@ -590,37 +582,33 @@ static int smb_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	mutex_init(&drvdata->mutex);
+	ret = smb_config_inport(dev, true);
+	if (ret)
+		return ret;
+
+	smb_reset_buffer(drvdata);
+	platform_set_drvdata(pdev, drvdata);
+	spin_lock_init(&drvdata->spinlock);
 	drvdata->pid = -1;
 
 	ret = smb_register_sink(pdev, drvdata);
 	if (ret) {
+		smb_config_inport(&pdev->dev, false);
 		dev_err(dev, "Failed to register SMB sink\n");
 		return ret;
 	}
 
-	ret = smb_config_inport(dev, true);
-	if (ret) {
-		smb_unregister_sink(drvdata);
-		return ret;
-	}
-
-	platform_set_drvdata(pdev, drvdata);
-
 	return 0;
 }
 
 static int smb_remove(struct platform_device *pdev)
 {
 	struct smb_drv_data *drvdata = platform_get_drvdata(pdev);
-	int ret;
-
-	ret = smb_config_inport(&pdev->dev, false);
-	if (ret)
-		return ret;
 
 	smb_unregister_sink(drvdata);
 
+	smb_config_inport(&pdev->dev, false);
+
 	return 0;
 }
 
diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.h b/drivers/hwtracing/coresight/ultrasoc-smb.h
index d2e14e8d2c8a..82a44c14a882 100644
--- a/drivers/hwtracing/coresight/ultrasoc-smb.h
+++ b/drivers/hwtracing/coresight/ultrasoc-smb.h
@@ -8,7 +8,7 @@
 #define _ULTRASOC_SMB_H
 
 #include <linux/miscdevice.h>
-#include <linux/mutex.h>
+#include <linux/spinlock.h>
 
 /* Offset of SMB global registers */
 #define SMB_GLB_CFG_REG		0x00
@@ -105,7 +105,7 @@ struct smb_data_buffer {
  * @csdev:	Component vitals needed by the framework.
  * @sdb:	Data buffer for SMB.
  * @miscdev:	Specifics to handle "/dev/xyz.smb" entry.
- * @mutex:	Control data access to one at a time.
+ * @spinlock:	Control data access to one at a time.
  * @reading:	Synchronise user space access to SMB buffer.
  * @pid:	Process ID of the process being monitored by the
  *		session that is using this component.
@@ -116,7 +116,7 @@ struct smb_drv_data {
 	struct coresight_device	*csdev;
 	struct smb_data_buffer sdb;
 	struct miscdevice miscdev;
-	struct mutex mutex;
+	spinlock_t spinlock;
 	bool reading;
 	pid_t pid;
 	enum cs_mode mode;
diff --git a/drivers/hwtracing/ptt/hisi_ptt.c b/drivers/hwtracing/ptt/hisi_ptt.c
index 49ea1b0f7489..a991ecb7515a 100644
--- a/drivers/hwtracing/ptt/hisi_ptt.c
+++ b/drivers/hwtracing/ptt/hisi_ptt.c
@@ -342,9 +342,9 @@ static int hisi_ptt_register_irq(struct hisi_ptt *hisi_ptt)
 		return ret;
 
 	hisi_ptt->trace_irq = pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ);
-	ret = devm_request_threaded_irq(&pdev->dev, hisi_ptt->trace_irq,
-					NULL, hisi_ptt_isr, 0,
-					DRV_NAME, hisi_ptt);
+	ret = devm_request_irq(&pdev->dev, hisi_ptt->trace_irq, hisi_ptt_isr,
+				IRQF_NOBALANCING | IRQF_NO_THREAD, DRV_NAME,
+				hisi_ptt);
 	if (ret) {
 		pci_err(pdev, "failed to request irq %d, ret = %d\n",
 			hisi_ptt->trace_irq, ret);
@@ -1000,6 +1000,9 @@ static int hisi_ptt_pmu_event_init(struct perf_event *event)
 		return -EOPNOTSUPP;
 	}
 
+	if (event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
 	if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type)
 		return -ENOENT;
 
@@ -1178,6 +1181,10 @@ static void hisi_ptt_pmu_del(struct perf_event *event, int flags)
 	hisi_ptt_pmu_stop(event, PERF_EF_UPDATE);
 }
 
+static void hisi_ptt_pmu_read(struct perf_event *event)
+{
+}
+
 static void hisi_ptt_remove_cpuhp_instance(void *hotplug_node)
 {
 	cpuhp_state_remove_instance_nocalls(hisi_ptt_pmu_online, hotplug_node);
@@ -1221,6 +1228,7 @@ static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt)
 		.stop		= hisi_ptt_pmu_stop,
 		.add		= hisi_ptt_pmu_add,
 		.del		= hisi_ptt_pmu_del,
+		.read		= hisi_ptt_pmu_read,
 	};
 
 	reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index f9ab671c8eda..07c571c7b699 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -96,12 +96,6 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
 		return page_size;
 	}
 
-	/* rdma_for_each_block() has a bug if the page size is smaller than the
-	 * page size used to build the umem. For now prevent smaller page sizes
-	 * from being returned.
-	 */
-	pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
-
 	/* The best result is the smallest page size that results in the minimum
 	 * number of required pages. Compute the largest page size that could
 	 * work based on VA address bits that don't change.
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 8a6da87f464b..94a7f3b0c71c 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1971,7 +1971,7 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
 	int rc;
 	u32 netdev_speed;
 	struct net_device *netdev;
-	struct ethtool_link_ksettings lksettings;
+	struct ethtool_link_ksettings lksettings = {};
 
 	if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
 		return -EINVAL;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index f79369c8360a..a99c68247af0 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -71,7 +71,7 @@ static char version[] =
 		BNXT_RE_DESC "\n";
 
 MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
-MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
+MODULE_DESCRIPTION(BNXT_RE_DESC);
 MODULE_LICENSE("Dual BSD/GPL");
 
 /* globals */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 0cd2612a4987..2bca9560f32d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -4760,10 +4760,15 @@ static int check_cong_type(struct ib_qp *ibqp,
 		cong_alg->wnd_mode_sel = WND_LIMIT;
 		break;
 	default:
-		ibdev_err(&hr_dev->ib_dev,
-			  "error type(%u) for congestion selection.\n",
-			  hr_dev->caps.cong_type);
-		return -EINVAL;
+		ibdev_warn(&hr_dev->ib_dev,
+			   "invalid type(%u) for congestion selection.\n",
+			   hr_dev->caps.cong_type);
+		hr_dev->caps.cong_type = CONG_TYPE_DCQCN;
+		cong_alg->alg_sel = CONG_DCQCN;
+		cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
+		cong_alg->dip_vld = DIP_INVALID;
+		cong_alg->wnd_mode_sel = WND_LIMIT;
+		break;
 	}
 
 	return 0;
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index 8fa7e4a18e73..bd4b2b896444 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -321,7 +321,11 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
 			break;
 		case IRDMA_AE_QP_SUSPEND_COMPLETE:
 			if (iwqp->iwdev->vsi.tc_change_pending) {
-				atomic_dec(&iwqp->sc_qp.vsi->qp_suspend_reqs);
+				if (!atomic_dec_return(&qp->vsi->qp_suspend_reqs))
+					wake_up(&iwqp->iwdev->suspend_wq);
+			}
+			if (iwqp->suspend_pending) {
+				iwqp->suspend_pending = false;
 				wake_up(&iwqp->iwdev->suspend_wq);
 			}
 			break;
@@ -581,9 +585,6 @@ static void irdma_destroy_cqp(struct irdma_pci_f *rf)
 	struct irdma_cqp *cqp = &rf->cqp;
 	int status = 0;
 
-	if (rf->cqp_cmpl_wq)
-		destroy_workqueue(rf->cqp_cmpl_wq);
-
 	status = irdma_sc_cqp_destroy(dev->cqp);
 	if (status)
 		ibdev_dbg(to_ibdev(dev), "ERR: Destroy CQP failed %d\n", status);
@@ -748,6 +749,9 @@ static void irdma_destroy_ccq(struct irdma_pci_f *rf)
 	struct irdma_ccq *ccq = &rf->ccq;
 	int status = 0;
 
+	if (rf->cqp_cmpl_wq)
+		destroy_workqueue(rf->cqp_cmpl_wq);
+
 	if (!rf->reset)
 		status = irdma_sc_ccq_destroy(dev->ccq, 0, true);
 	if (status)
@@ -1180,7 +1184,6 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
 	int status;
 	struct irdma_ceq_init_info info = {};
 	struct irdma_sc_dev *dev = &rf->sc_dev;
-	u64 scratch;
 	u32 ceq_size;
 
 	info.ceq_id = ceq_id;
@@ -1201,14 +1204,13 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
 	iwceq->sc_ceq.ceq_id = ceq_id;
 	info.dev = dev;
 	info.vsi = vsi;
-	scratch = (uintptr_t)&rf->cqp.sc_cqp;
 	status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info);
 	if (!status) {
 		if (dev->ceq_valid)
 			status = irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq,
 						   IRDMA_OP_CEQ_CREATE);
 		else
-			status = irdma_sc_cceq_create(&iwceq->sc_ceq, scratch);
+			status = irdma_sc_cceq_create(&iwceq->sc_ceq, 0);
 	}
 
 	if (status) {
diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c
index 9ac48b4dab41..3f13200ff71b 100644
--- a/drivers/infiniband/hw/irdma/main.c
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -48,7 +48,7 @@ static void irdma_prep_tc_change(struct irdma_device *iwdev)
 	/* Wait for all qp's to suspend */
 	wait_event_timeout(iwdev->suspend_wq,
 			   !atomic_read(&iwdev->vsi.qp_suspend_reqs),
-			   IRDMA_EVENT_TIMEOUT);
+			   msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS));
 	irdma_ws_reset(&iwdev->vsi);
 }
 
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
index d66d87bb8bc4..b65bc2ea542f 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -78,7 +78,7 @@ extern struct auxiliary_driver i40iw_auxiliary_drv;
 
 #define MAX_DPC_ITERATIONS	128
 
-#define IRDMA_EVENT_TIMEOUT		50000
+#define IRDMA_EVENT_TIMEOUT_MS		5000
 #define IRDMA_VCHNL_EVENT_TIMEOUT	100000
 #define IRDMA_RST_TIMEOUT_HZ		4
 
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 2138f0a2ff85..b5eb8d421988 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -1157,6 +1157,21 @@ exit:
 	return prio;
 }
 
+static int irdma_wait_for_suspend(struct irdma_qp *iwqp)
+{
+	if (!wait_event_timeout(iwqp->iwdev->suspend_wq,
+				!iwqp->suspend_pending,
+				msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS))) {
+		iwqp->suspend_pending = false;
+		ibdev_warn(&iwqp->iwdev->ibdev,
+			   "modify_qp timed out waiting for suspend. qp_id = %d, last_ae = 0x%x\n",
+			   iwqp->ibqp.qp_num, iwqp->last_aeq);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
 /**
  * irdma_modify_qp_roce - modify qp request
  * @ibqp: qp's pointer for modify
@@ -1420,17 +1435,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
 			info.next_iwarp_state = IRDMA_QP_STATE_SQD;
 			issue_modify_qp = 1;
+			iwqp->suspend_pending = true;
 			break;
 		case IB_QPS_SQE:
 		case IB_QPS_ERR:
 		case IB_QPS_RESET:
-			if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) {
-				spin_unlock_irqrestore(&iwqp->lock, flags);
-				info.next_iwarp_state = IRDMA_QP_STATE_SQD;
-				irdma_hw_modify_qp(iwdev, iwqp, &info, true);
-				spin_lock_irqsave(&iwqp->lock, flags);
-			}
-
 			if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
 				spin_unlock_irqrestore(&iwqp->lock, flags);
 				if (udata && udata->inlen) {
@@ -1467,6 +1476,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 			ctx_info->rem_endpoint_idx = udp_info->arp_idx;
 			if (irdma_hw_modify_qp(iwdev, iwqp, &info, true))
 				return -EINVAL;
+			if (info.next_iwarp_state == IRDMA_QP_STATE_SQD) {
+				ret = irdma_wait_for_suspend(iwqp);
+				if (ret)
+					return ret;
+			}
 			spin_lock_irqsave(&iwqp->lock, flags);
 			if (iwqp->iwarp_state == info.curr_iwarp_state) {
 				iwqp->iwarp_state = info.next_iwarp_state;
@@ -2900,7 +2914,7 @@ static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region,
 	iwmr->type = reg_type;
 
 	pgsz_bitmap = (reg_type == IRDMA_MEMREG_TYPE_MEM) ?
-		iwdev->rf->sc_dev.hw_attrs.page_size_cap : PAGE_SIZE;
+		iwdev->rf->sc_dev.hw_attrs.page_size_cap : SZ_4K;
 
 	iwmr->page_size = ib_umem_find_best_pgsz(region, pgsz_bitmap, virt);
 	if (unlikely(!iwmr->page_size)) {
@@ -2932,6 +2946,11 @@ static int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req,
 	int err;
 	u8 lvl;
 
+	/* iWarp: Catch page not starting on OS page boundary */
+	if (!rdma_protocol_roce(&iwdev->ibdev, 1) &&
+	    ib_umem_offset(iwmr->region))
+		return -EINVAL;
+
 	total = req.sq_pages + req.rq_pages + 1;
 	if (total > iwmr->page_cnt)
 		return -EINVAL;
diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h
index c42ac22de00e..cfa140b36395 100644
--- a/drivers/infiniband/hw/irdma/verbs.h
+++ b/drivers/infiniband/hw/irdma/verbs.h
@@ -198,6 +198,7 @@ struct irdma_qp {
 	u8 flush_issued : 1;
 	u8 sig_all : 1;
 	u8 pau_mode : 1;
+	u8 suspend_pending : 1;
 	u8 rsvd : 1;
 	u8 iwarp_state;
 	u16 term_sq_flush_code;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 07261523c554..7f3167ce2972 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -384,7 +384,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
 	struct rtrs_clt_path *clt_path;
 	int err;
 
-	if (WARN_ON(!req->in_use))
+	if (!req->in_use)
 		return;
 	if (WARN_ON(!req->con))
 		return;
@@ -1699,7 +1699,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
 		clt_path->s.dev_ref++;
 		max_send_wr = min_t(int, wr_limit,
 			      /* QD * (REQ + RSP + FR REGS or INVS) + drain */
-			      clt_path->queue_depth * 3 + 1);
+			      clt_path->queue_depth * 4 + 1);
 		max_recv_wr = min_t(int, wr_limit,
 			      clt_path->queue_depth * 3 + 1);
 		max_send_sge = 2;
@@ -2350,8 +2350,6 @@ static int init_conns(struct rtrs_clt_path *clt_path)
 	if (err)
 		goto destroy;
 
-	rtrs_start_hb(&clt_path->s);
-
 	return 0;
 
 destroy:
@@ -2625,6 +2623,7 @@ static int init_path(struct rtrs_clt_path *clt_path)
 		goto out;
 	}
 	rtrs_clt_path_up(clt_path);
+	rtrs_start_hb(&clt_path->s);
 out:
 	mutex_unlock(&clt_path->init_mutex);
 
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index 75e56604e462..1d33efb8fb03 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -65,8 +65,9 @@ static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path,
 {
 	enum rtrs_srv_state old_state;
 	bool changed = false;
+	unsigned long flags;
 
-	spin_lock_irq(&srv_path->state_lock);
+	spin_lock_irqsave(&srv_path->state_lock, flags);
 	old_state = srv_path->state;
 	switch (new_state) {
 	case RTRS_SRV_CONNECTED:
@@ -87,7 +88,7 @@ static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path,
 	}
 	if (changed)
 		srv_path->state = new_state;
-	spin_unlock_irq(&srv_path->state_lock);
+	spin_unlock_irqrestore(&srv_path->state_lock, flags);
 
 	return changed;
 }
@@ -550,7 +551,10 @@ static void unmap_cont_bufs(struct rtrs_srv_path *srv_path)
 		struct rtrs_srv_mr *srv_mr;
 
 		srv_mr = &srv_path->mrs[i];
-		rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1);
+
+		if (always_invalidate)
+			rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1);
+
 		ib_dereg_mr(srv_mr->mr);
 		ib_dma_unmap_sg(srv_path->s.dev->ib_dev, srv_mr->sgt.sgl,
 				srv_mr->sgt.nents, DMA_BIDIRECTIONAL);
@@ -709,20 +713,23 @@ static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
 	WARN_ON(wc->opcode != IB_WC_SEND);
 }
 
-static void rtrs_srv_path_up(struct rtrs_srv_path *srv_path)
+static int rtrs_srv_path_up(struct rtrs_srv_path *srv_path)
 {
 	struct rtrs_srv_sess *srv = srv_path->srv;
 	struct rtrs_srv_ctx *ctx = srv->ctx;
-	int up;
+	int up, ret = 0;
 
 	mutex_lock(&srv->paths_ev_mutex);
 	up = ++srv->paths_up;
 	if (up == 1)
-		ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL);
+		ret = ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL);
 	mutex_unlock(&srv->paths_ev_mutex);
 
 	/* Mark session as established */
-	srv_path->established = true;
+	if (!ret)
+		srv_path->established = true;
+
+	return ret;
 }
 
 static void rtrs_srv_path_down(struct rtrs_srv_path *srv_path)
@@ -851,7 +858,12 @@ static int process_info_req(struct rtrs_srv_con *con,
 		goto iu_free;
 	kobject_get(&srv_path->kobj);
 	get_device(&srv_path->srv->dev);
-	rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED);
+	err = rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED);
+	if (!err) {
+		rtrs_err(s, "rtrs_srv_change_state(), err: %d\n", err);
+		goto iu_free;
+	}
+
 	rtrs_srv_start_hb(srv_path);
 
 	/*
@@ -860,7 +872,11 @@ static int process_info_req(struct rtrs_srv_con *con,
 	 * all connections are successfully established.  Thus, simply notify
 	 * listener with a proper event if we are the first path.
 	 */
-	rtrs_srv_path_up(srv_path);
+	err = rtrs_srv_path_up(srv_path);
+	if (err) {
+		rtrs_err(s, "rtrs_srv_path_up(), err: %d\n", err);
+		goto iu_free;
+	}
 
 	ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev,
 				      tx_iu->dma_addr,
@@ -1516,7 +1532,6 @@ static void rtrs_srv_close_work(struct work_struct *work)
 
 	srv_path = container_of(work, typeof(*srv_path), close_work);
 
-	rtrs_srv_destroy_path_files(srv_path);
 	rtrs_srv_stop_hb(srv_path);
 
 	for (i = 0; i < srv_path->s.con_num; i++) {
@@ -1536,6 +1551,8 @@ static void rtrs_srv_close_work(struct work_struct *work)
 	/* Wait for all completion */
 	wait_for_completion(&srv_path->complete_done);
 
+	rtrs_srv_destroy_path_files(srv_path);
+
 	/* Notify upper layer if we are the last path */
 	rtrs_srv_path_down(srv_path);
 
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index a3414afe11b0..23cb80d62a9a 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1522,6 +1522,15 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
 {
 	struct qi_desc desc;
 
+	/*
+	 * VT-d spec, section 4.3:
+	 *
+	 * Software is recommended to not submit any Device-TLB invalidation
+	 * requests while address remapping hardware is disabled.
+	 */
+	if (!(iommu->gcmd & DMA_GCMD_TE))
+		return;
+
 	if (mask) {
 		addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
@@ -1587,6 +1596,15 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
 	unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
 
+	/*
+	 * VT-d spec, section 4.3:
+	 *
+	 * Software is recommended to not submit any Device-TLB invalidation
+	 * requests while address remapping hardware is disabled.
+	 */
+	if (!(iommu->gcmd & DMA_GCMD_TE))
+		return;
+
 	desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
 		QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
 		QI_DEV_IOTLB_PFSID(pfsid);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 3531b956556c..897159dba47d 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -299,7 +299,7 @@ static int iommu_skip_te_disable;
 #define IDENTMAP_AZALIA		4
 
 const struct iommu_ops intel_iommu_ops;
-const struct iommu_dirty_ops intel_dirty_ops;
+static const struct iommu_dirty_ops intel_dirty_ops;
 
 static bool translation_pre_enabled(struct intel_iommu *iommu)
 {
@@ -2207,6 +2207,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 			attr |= DMA_FL_PTE_DIRTY;
 	}
 
+	domain->has_mappings = true;
+
 	pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
 
 	while (nr_pages > 0) {
@@ -2490,7 +2492,8 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
 		return ret;
 	}
 
-	iommu_enable_pci_caps(info);
+	if (sm_supported(info->iommu) || !domain_type_is_si(info->domain))
+		iommu_enable_pci_caps(info);
 
 	return 0;
 }
@@ -3925,8 +3928,8 @@ static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *op
  */
 static void domain_context_clear(struct device_domain_info *info)
 {
-	if (!info->iommu || !info->dev || !dev_is_pci(info->dev))
-		return;
+	if (!dev_is_pci(info->dev))
+		domain_context_clear_one(info, info->bus, info->devfn);
 
 	pci_for_each_dma_alias(to_pci_dev(info->dev),
 			       &domain_context_clear_one_cb, info);
@@ -4360,7 +4363,8 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
 		return true;
 
 	spin_lock_irqsave(&dmar_domain->lock, flags);
-	if (!domain_support_force_snooping(dmar_domain)) {
+	if (!domain_support_force_snooping(dmar_domain) ||
+	    (!dmar_domain->use_first_level && dmar_domain->has_mappings)) {
 		spin_unlock_irqrestore(&dmar_domain->lock, flags);
 		return false;
 	}
@@ -4925,7 +4929,7 @@ static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain,
 	return 0;
 }
 
-const struct iommu_dirty_ops intel_dirty_ops = {
+static const struct iommu_dirty_ops intel_dirty_ops = {
 	.set_dirty_tracking = intel_iommu_set_dirty_tracking,
 	.read_and_clear_dirty = intel_iommu_read_and_clear_dirty,
 };
@@ -5073,7 +5077,7 @@ static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
 	ver = (dev->device >> 8) & 0xff;
 	if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
 	    ver != 0x4e && ver != 0x8a && ver != 0x98 &&
-	    ver != 0x9a && ver != 0xa7)
+	    ver != 0x9a && ver != 0xa7 && ver != 0x7d)
 		return;
 
 	if (risky_device(dev))
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 65d37a138c75..ce030c5b5772 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -602,6 +602,9 @@ struct dmar_domain {
 					 */
 	u8 dirty_tracking:1;		/* Dirty tracking is enabled */
 	u8 nested_parent:1;		/* Has other domains nested on it */
+	u8 has_mappings:1;		/* Has mappings configured through
+					 * iommu_map() interface.
+					 */
 
 	spinlock_t lock;		/* Protect device tracking lists */
 	struct list_head devices;	/* all devices' list */
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 50a481c895b8..ac12f76c1212 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -216,6 +216,27 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
 	rcu_read_unlock();
 }
 
+static void intel_flush_svm_all(struct intel_svm *svm)
+{
+	struct device_domain_info *info;
+	struct intel_svm_dev *sdev;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdev, &svm->devs, list) {
+		info = dev_iommu_priv_get(sdev->dev);
+
+		qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0);
+		if (info->ats_enabled) {
+			qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
+						 svm->pasid, sdev->qdep,
+						 0, 64 - VTD_PAGE_SHIFT);
+			quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT,
+						  svm->pasid, sdev->qdep);
+		}
+	}
+	rcu_read_unlock();
+}
+
 /* Pages have been freed at this point */
 static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
 					struct mm_struct *mm,
@@ -223,6 +244,11 @@ static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
 {
 	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
 
+	if (start == 0 && end == -1UL) {
+		intel_flush_svm_all(svm);
+		return;
+	}
+
 	intel_flush_svm_range(svm, start,
 			      (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
 }
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f17a1113f3d6..33e2a9b5d339 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -485,11 +485,12 @@ static void iommu_deinit_device(struct device *dev)
 	dev_iommu_free(dev);
 }
 
+DEFINE_MUTEX(iommu_probe_device_lock);
+
 static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
 {
 	const struct iommu_ops *ops = dev->bus->iommu_ops;
 	struct iommu_group *group;
-	static DEFINE_MUTEX(iommu_probe_device_lock);
 	struct group_device *gdev;
 	int ret;
 
@@ -502,17 +503,15 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
 	 * probably be able to use device_lock() here to minimise the scope,
 	 * but for now enforcing a simple global ordering is fine.
 	 */
-	mutex_lock(&iommu_probe_device_lock);
+	lockdep_assert_held(&iommu_probe_device_lock);
 
 	/* Device is probed already if in a group */
-	if (dev->iommu_group) {
-		ret = 0;
-		goto out_unlock;
-	}
+	if (dev->iommu_group)
+		return 0;
 
 	ret = iommu_init_device(dev, ops);
 	if (ret)
-		goto out_unlock;
+		return ret;
 
 	group = dev->iommu_group;
 	gdev = iommu_group_alloc_device(group, dev);
@@ -548,7 +547,6 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
 			list_add_tail(&group->entry, group_list);
 	}
 	mutex_unlock(&group->mutex);
-	mutex_unlock(&iommu_probe_device_lock);
 
 	if (dev_is_pci(dev))
 		iommu_dma_set_pci_32bit_workaround(dev);
@@ -562,8 +560,6 @@ err_put_group:
 	iommu_deinit_device(dev);
 	mutex_unlock(&group->mutex);
 	iommu_group_put(group);
-out_unlock:
-	mutex_unlock(&iommu_probe_device_lock);
 
 	return ret;
 }
@@ -573,7 +569,9 @@ int iommu_probe_device(struct device *dev)
 	const struct iommu_ops *ops;
 	int ret;
 
+	mutex_lock(&iommu_probe_device_lock);
 	ret = __iommu_probe_device(dev, NULL);
+	mutex_unlock(&iommu_probe_device_lock);
 	if (ret)
 		return ret;
 
@@ -1788,7 +1786,7 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
 	 */
 	if (ops->default_domain) {
 		if (req_type)
-			return NULL;
+			return ERR_PTR(-EINVAL);
 		return ops->default_domain;
 	}
 
@@ -1797,15 +1795,15 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
 
 	/* The driver gave no guidance on what type to use, try the default */
 	dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type);
-	if (dom)
+	if (!IS_ERR(dom))
 		return dom;
 
 	/* Otherwise IDENTITY and DMA_FQ defaults will try DMA */
 	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA)
-		return NULL;
+		return ERR_PTR(-EINVAL);
 	dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA);
-	if (!dom)
-		return NULL;
+	if (IS_ERR(dom))
+		return dom;
 
 	pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
 		iommu_def_domain_type, group->name);
@@ -1822,7 +1820,9 @@ static int probe_iommu_group(struct device *dev, void *data)
 	struct list_head *group_list = data;
 	int ret;
 
+	mutex_lock(&iommu_probe_device_lock);
 	ret = __iommu_probe_device(dev, group_list);
+	mutex_unlock(&iommu_probe_device_lock);
 	if (ret == -ENODEV)
 		ret = 0;
 
@@ -2094,10 +2094,17 @@ static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops,
 	else if (ops->domain_alloc)
 		domain = ops->domain_alloc(alloc_type);
 	else
-		return NULL;
+		return ERR_PTR(-EOPNOTSUPP);
 
+	/*
+	 * Many domain_alloc ops now return ERR_PTR, make things easier for the
+	 * driver by accepting ERR_PTR from all domain_alloc ops instead of
+	 * having two rules.
+	 */
+	if (IS_ERR(domain))
+		return domain;
 	if (!domain)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	domain->type = type;
 	/*
@@ -2110,9 +2117,14 @@ static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops,
 	if (!domain->ops)
 		domain->ops = ops->default_domain_ops;
 
-	if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) {
-		iommu_domain_free(domain);
-		domain = NULL;
+	if (iommu_is_dma_domain(domain)) {
+		int rc;
+
+		rc = iommu_get_dma_cookie(domain);
+		if (rc) {
+			iommu_domain_free(domain);
+			return ERR_PTR(rc);
+		}
 	}
 	return domain;
 }
@@ -2129,10 +2141,15 @@ __iommu_group_domain_alloc(struct iommu_group *group, unsigned int type)
 
 struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
 {
+	struct iommu_domain *domain;
+
 	if (bus == NULL || bus->iommu_ops == NULL)
 		return NULL;
-	return __iommu_domain_alloc(bus->iommu_ops, NULL,
+	domain = __iommu_domain_alloc(bus->iommu_ops, NULL,
 				    IOMMU_DOMAIN_UNMANAGED);
+	if (IS_ERR(domain))
+		return NULL;
+	return domain;
 }
 EXPORT_SYMBOL_GPL(iommu_domain_alloc);
 
@@ -3041,8 +3058,8 @@ static int iommu_setup_default_domain(struct iommu_group *group,
 		return -EINVAL;
 
 	dom = iommu_group_alloc_default_domain(group, req_type);
-	if (!dom)
-		return -ENODEV;
+	if (IS_ERR(dom))
+		return PTR_ERR(dom);
 
 	if (group->default_domain == dom)
 		return 0;
@@ -3243,21 +3260,23 @@ void iommu_device_unuse_default_domain(struct device *dev)
 
 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
 {
+	struct iommu_domain *domain;
+
 	if (group->blocking_domain)
 		return 0;
 
-	group->blocking_domain =
-		__iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED);
-	if (!group->blocking_domain) {
+	domain = __iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED);
+	if (IS_ERR(domain)) {
 		/*
 		 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED
 		 * create an empty domain instead.
 		 */
-		group->blocking_domain = __iommu_group_domain_alloc(
-			group, IOMMU_DOMAIN_UNMANAGED);
-		if (!group->blocking_domain)
-			return -EINVAL;
+		domain = __iommu_group_domain_alloc(group,
+						    IOMMU_DOMAIN_UNMANAGED);
+		if (IS_ERR(domain))
+			return PTR_ERR(domain);
 	}
+	group->blocking_domain = domain;
 	return 0;
 }
 
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 59d3a07300d9..873630c111c1 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -571,7 +571,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
 			continue;
 		destroy_hwpt = (*do_attach)(idev, hwpt);
 		if (IS_ERR(destroy_hwpt)) {
-			iommufd_put_object(&hwpt->obj);
+			iommufd_put_object(idev->ictx, &hwpt->obj);
 			/*
 			 * -EINVAL means the domain is incompatible with the
 			 * device. Other error codes should propagate to
@@ -583,7 +583,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
 			goto out_unlock;
 		}
 		*pt_id = hwpt->obj.id;
-		iommufd_put_object(&hwpt->obj);
+		iommufd_put_object(idev->ictx, &hwpt->obj);
 		goto out_unlock;
 	}
 
@@ -652,7 +652,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
 		destroy_hwpt = ERR_PTR(-EINVAL);
 		goto out_put_pt_obj;
 	}
-	iommufd_put_object(pt_obj);
+	iommufd_put_object(idev->ictx, pt_obj);
 
 	/* This destruction has to be after we unlock everything */
 	if (destroy_hwpt)
@@ -660,7 +660,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
 	return 0;
 
 out_put_pt_obj:
-	iommufd_put_object(pt_obj);
+	iommufd_put_object(idev->ictx, pt_obj);
 	return PTR_ERR(destroy_hwpt);
 }
 
@@ -792,7 +792,7 @@ static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id)
 	if (IS_ERR(ioas))
 		return PTR_ERR(ioas);
 	rc = iommufd_access_change_ioas(access, ioas);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(access->ictx, &ioas->obj);
 	return rc;
 }
 
@@ -941,7 +941,7 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
 
 		access->ops->unmap(access->data, iova, length);
 
-		iommufd_put_object(&access->obj);
+		iommufd_put_object(access->ictx, &access->obj);
 		xa_lock(&ioas->iopt.access_list);
 	}
 	xa_unlock(&ioas->iopt.access_list);
@@ -1243,6 +1243,6 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
 out_free:
 	kfree(data);
 out_put:
-	iommufd_put_object(&idev->obj);
+	iommufd_put_object(ucmd->ictx, &idev->obj);
 	return rc;
 }
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 2abbeafdbd22..cbb5df0a6c32 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -318,9 +318,9 @@ out_unlock:
 	if (ioas)
 		mutex_unlock(&ioas->mutex);
 out_put_pt:
-	iommufd_put_object(pt_obj);
+	iommufd_put_object(ucmd->ictx, pt_obj);
 out_put_idev:
-	iommufd_put_object(&idev->obj);
+	iommufd_put_object(ucmd->ictx, &idev->obj);
 	return rc;
 }
 
@@ -345,7 +345,7 @@ int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd)
 	rc = iopt_set_dirty_tracking(&ioas->iopt, hwpt_paging->common.domain,
 				     enable);
 
-	iommufd_put_object(&hwpt_paging->common.obj);
+	iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
 	return rc;
 }
 
@@ -368,6 +368,6 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd)
 	rc = iopt_read_and_clear_dirty_data(
 		&ioas->iopt, hwpt_paging->common.domain, cmd->flags, cmd);
 
-	iommufd_put_object(&hwpt_paging->common.obj);
+	iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
 	return rc;
 }
diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
index d5624577f79f..742248276548 100644
--- a/drivers/iommu/iommufd/ioas.c
+++ b/drivers/iommu/iommufd/ioas.c
@@ -105,7 +105,7 @@ int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd)
 		rc = -EMSGSIZE;
 out_put:
 	up_read(&ioas->iopt.iova_rwsem);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 	return rc;
 }
 
@@ -175,7 +175,7 @@ out_free:
 		interval_tree_remove(node, &allowed_iova);
 		kfree(container_of(node, struct iopt_allowed, node));
 	}
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 	return rc;
 }
 
@@ -228,7 +228,7 @@ int iommufd_ioas_map(struct iommufd_ucmd *ucmd)
 	cmd->iova = iova;
 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 out_put:
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 	return rc;
 }
 
@@ -258,7 +258,7 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd)
 		return PTR_ERR(src_ioas);
 	rc = iopt_get_pages(&src_ioas->iopt, cmd->src_iova, cmd->length,
 			    &pages_list);
-	iommufd_put_object(&src_ioas->obj);
+	iommufd_put_object(ucmd->ictx, &src_ioas->obj);
 	if (rc)
 		return rc;
 
@@ -279,7 +279,7 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd)
 	cmd->dst_iova = iova;
 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 out_put_dst:
-	iommufd_put_object(&dst_ioas->obj);
+	iommufd_put_object(ucmd->ictx, &dst_ioas->obj);
 out_pages:
 	iopt_free_pages_list(&pages_list);
 	return rc;
@@ -315,7 +315,7 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd)
 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 
 out_put:
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 	return rc;
 }
 
@@ -393,6 +393,6 @@ int iommufd_ioas_option(struct iommufd_ucmd *ucmd)
 		rc = -EOPNOTSUPP;
 	}
 
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 	return rc;
 }
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index a74cfefffbc6..abae041e256f 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -21,6 +21,7 @@ struct iommufd_ctx {
 	struct file *file;
 	struct xarray objects;
 	struct xarray groups;
+	wait_queue_head_t destroy_wait;
 
 	u8 account_mode;
 	/* Compatibility with VFIO no iommu */
@@ -135,7 +136,7 @@ enum iommufd_object_type {
 
 /* Base struct for all objects with a userspace ID handle. */
 struct iommufd_object {
-	struct rw_semaphore destroy_rwsem;
+	refcount_t shortterm_users;
 	refcount_t users;
 	enum iommufd_object_type type;
 	unsigned int id;
@@ -143,10 +144,15 @@ struct iommufd_object {
 
 static inline bool iommufd_lock_obj(struct iommufd_object *obj)
 {
-	if (!down_read_trylock(&obj->destroy_rwsem))
+	if (!refcount_inc_not_zero(&obj->users))
 		return false;
-	if (!refcount_inc_not_zero(&obj->users)) {
-		up_read(&obj->destroy_rwsem);
+	if (!refcount_inc_not_zero(&obj->shortterm_users)) {
+		/*
+		 * If the caller doesn't already have a ref on obj this must be
+		 * called under the xa_lock. Otherwise the caller is holding a
+		 * ref on users. Thus it cannot be one before this decrement.
+		 */
+		refcount_dec(&obj->users);
 		return false;
 	}
 	return true;
@@ -154,10 +160,16 @@ static inline bool iommufd_lock_obj(struct iommufd_object *obj)
 
 struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
 					  enum iommufd_object_type type);
-static inline void iommufd_put_object(struct iommufd_object *obj)
+static inline void iommufd_put_object(struct iommufd_ctx *ictx,
+				      struct iommufd_object *obj)
 {
+	/*
+	 * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees
+	 * a spurious !0 users with a 0 shortterm_users.
+	 */
 	refcount_dec(&obj->users);
-	up_read(&obj->destroy_rwsem);
+	if (refcount_dec_and_test(&obj->shortterm_users))
+		wake_up_interruptible_all(&ictx->destroy_wait);
 }
 
 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
@@ -165,17 +177,49 @@ void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
 				      struct iommufd_object *obj);
 void iommufd_object_finalize(struct iommufd_ctx *ictx,
 			     struct iommufd_object *obj);
-void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
-				   struct iommufd_object *obj, bool allow_fail);
+
+enum {
+	REMOVE_WAIT_SHORTTERM = 1,
+};
+int iommufd_object_remove(struct iommufd_ctx *ictx,
+			  struct iommufd_object *to_destroy, u32 id,
+			  unsigned int flags);
+
+/*
+ * The caller holds a users refcount and wants to destroy the object. At this
+ * point the caller has no shortterm_users reference and at least the xarray
+ * will be holding one.
+ */
 static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
 					       struct iommufd_object *obj)
 {
-	__iommufd_object_destroy_user(ictx, obj, false);
+	int ret;
+
+	ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM);
+
+	/*
+	 * If there is a bug and we couldn't destroy the object then we did put
+	 * back the caller's users refcount and will eventually try to free it
+	 * again during close.
+	 */
+	WARN_ON(ret);
 }
-static inline void iommufd_object_deref_user(struct iommufd_ctx *ictx,
-					     struct iommufd_object *obj)
+
+/*
+ * The HWPT allocated by autodomains is used in possibly many devices and
+ * is automatically destroyed when its refcount reaches zero.
+ *
+ * If userspace uses the HWPT manually, even for a short term, then it will
+ * disrupt this refcounting and the auto-free in the kernel will not work.
+ * Userspace that tries to use the automatically allocated HWPT must be careful
+ * to ensure that it is consistently destroyed, eg by not racing accesses
+ * and by not attaching an automatic HWPT to a device manually.
+ */
+static inline void
+iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
+				   struct iommufd_object *obj)
 {
-	__iommufd_object_destroy_user(ictx, obj, true);
+	iommufd_object_remove(ictx, obj, obj->id, 0);
 }
 
 struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
@@ -311,7 +355,7 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
 		lockdep_assert_not_held(&hwpt_paging->ioas->mutex);
 
 		if (hwpt_paging->auto_domain) {
-			iommufd_object_deref_user(ictx, &hwpt->obj);
+			iommufd_object_put_and_try_destroy(ictx, &hwpt->obj);
 			return;
 		}
 	}
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 45b9d40773b1..c9091e46d208 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -33,7 +33,6 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
 					     size_t size,
 					     enum iommufd_object_type type)
 {
-	static struct lock_class_key obj_keys[IOMMUFD_OBJ_MAX];
 	struct iommufd_object *obj;
 	int rc;
 
@@ -41,15 +40,8 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
 	if (!obj)
 		return ERR_PTR(-ENOMEM);
 	obj->type = type;
-	/*
-	 * In most cases the destroy_rwsem is obtained with try so it doesn't
-	 * interact with lockdep, however on destroy we have to sleep. This
-	 * means if we have to destroy an object while holding a get on another
-	 * object it triggers lockdep. Using one locking class per object type
-	 * is a simple and reasonable way to avoid this.
-	 */
-	__init_rwsem(&obj->destroy_rwsem, "iommufd_object::destroy_rwsem",
-		     &obj_keys[type]);
+	/* Starts out bias'd by 1 until it is removed from the xarray */
+	refcount_set(&obj->shortterm_users, 1);
 	refcount_set(&obj->users, 1);
 
 	/*
@@ -129,92 +121,113 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
 	return obj;
 }
 
+static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx,
+					     struct iommufd_object *to_destroy)
+{
+	if (refcount_dec_and_test(&to_destroy->shortterm_users))
+		return 0;
+
+	if (wait_event_timeout(ictx->destroy_wait,
+				refcount_read(&to_destroy->shortterm_users) ==
+					0,
+				msecs_to_jiffies(10000)))
+		return 0;
+
+	pr_crit("Time out waiting for iommufd object to become free\n");
+	refcount_inc(&to_destroy->shortterm_users);
+	return -EBUSY;
+}
+
 /*
  * Remove the given object id from the xarray if the only reference to the
- * object is held by the xarray. The caller must call ops destroy().
+ * object is held by the xarray.
  */
-static struct iommufd_object *iommufd_object_remove(struct iommufd_ctx *ictx,
-						    u32 id, bool extra_put)
+int iommufd_object_remove(struct iommufd_ctx *ictx,
+			  struct iommufd_object *to_destroy, u32 id,
+			  unsigned int flags)
 {
 	struct iommufd_object *obj;
 	XA_STATE(xas, &ictx->objects, id);
-
-	xa_lock(&ictx->objects);
-	obj = xas_load(&xas);
-	if (xa_is_zero(obj) || !obj) {
-		obj = ERR_PTR(-ENOENT);
-		goto out_xa;
-	}
+	bool zerod_shortterm = false;
+	int ret;
 
 	/*
-	 * If the caller is holding a ref on obj we put it here under the
-	 * spinlock.
+	 * The purpose of the shortterm_users is to ensure deterministic
+	 * destruction of objects used by external drivers and destroyed by this
+	 * function. Any temporary increment of the refcount must increment
+	 * shortterm_users, such as during ioctl execution.
 	 */
-	if (extra_put)
+	if (flags & REMOVE_WAIT_SHORTTERM) {
+		ret = iommufd_object_dec_wait_shortterm(ictx, to_destroy);
+		if (ret) {
+			/*
+			 * We have a bug. Put back the callers reference and
+			 * defer cleaning this object until close.
+			 */
+			refcount_dec(&to_destroy->users);
+			return ret;
+		}
+		zerod_shortterm = true;
+	}
+
+	xa_lock(&ictx->objects);
+	obj = xas_load(&xas);
+	if (to_destroy) {
+		/*
+		 * If the caller is holding a ref on obj we put it here under
+		 * the spinlock.
+		 */
 		refcount_dec(&obj->users);
 
+		if (WARN_ON(obj != to_destroy)) {
+			ret = -ENOENT;
+			goto err_xa;
+		}
+	} else if (xa_is_zero(obj) || !obj) {
+		ret = -ENOENT;
+		goto err_xa;
+	}
+
 	if (!refcount_dec_if_one(&obj->users)) {
-		obj = ERR_PTR(-EBUSY);
-		goto out_xa;
+		ret = -EBUSY;
+		goto err_xa;
 	}
 
 	xas_store(&xas, NULL);
 	if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj))
 		ictx->vfio_ioas = NULL;
-
-out_xa:
 	xa_unlock(&ictx->objects);
 
-	/* The returned object reference count is zero */
-	return obj;
-}
-
-/*
- * The caller holds a users refcount and wants to destroy the object. Returns
- * true if the object was destroyed. In all cases the caller no longer has a
- * reference on obj.
- */
-void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
-				   struct iommufd_object *obj, bool allow_fail)
-{
-	struct iommufd_object *ret;
-
 	/*
-	 * The purpose of the destroy_rwsem is to ensure deterministic
-	 * destruction of objects used by external drivers and destroyed by this
-	 * function. Any temporary increment of the refcount must hold the read
-	 * side of this, such as during ioctl execution.
-	 */
-	down_write(&obj->destroy_rwsem);
-	ret = iommufd_object_remove(ictx, obj->id, true);
-	up_write(&obj->destroy_rwsem);
-
-	if (allow_fail && IS_ERR(ret))
-		return;
-
-	/*
-	 * If there is a bug and we couldn't destroy the object then we did put
-	 * back the caller's refcount and will eventually try to free it again
-	 * during close.
+	 * Since users is zero any positive users_shortterm must be racing
+	 * iommufd_put_object(), or we have a bug.
 	 */
-	if (WARN_ON(IS_ERR(ret)))
-		return;
+	if (!zerod_shortterm) {
+		ret = iommufd_object_dec_wait_shortterm(ictx, obj);
+		if (WARN_ON(ret))
+			return ret;
+	}
 
 	iommufd_object_ops[obj->type].destroy(obj);
 	kfree(obj);
+	return 0;
+
+err_xa:
+	if (zerod_shortterm) {
+		/* Restore the xarray owned reference */
+		refcount_set(&obj->shortterm_users, 1);
+	}
+	xa_unlock(&ictx->objects);
+
+	/* The returned object reference count is zero */
+	return ret;
 }
 
 static int iommufd_destroy(struct iommufd_ucmd *ucmd)
 {
 	struct iommu_destroy *cmd = ucmd->cmd;
-	struct iommufd_object *obj;
 
-	obj = iommufd_object_remove(ucmd->ictx, cmd->id, false);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-	iommufd_object_ops[obj->type].destroy(obj);
-	kfree(obj);
-	return 0;
+	return iommufd_object_remove(ucmd->ictx, NULL, cmd->id, 0);
 }
 
 static int iommufd_fops_open(struct inode *inode, struct file *filp)
@@ -238,6 +251,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
 	xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
 	xa_init(&ictx->groups);
 	ictx->file = filp;
+	init_waitqueue_head(&ictx->destroy_wait);
 	filp->private_data = ictx;
 	return 0;
 }
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index 5d93434003d8..022ef8f55088 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -86,7 +86,7 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
 	if (IS_ERR(ioas))
 		return;
 	*iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 }
 
 struct mock_iommu_domain {
@@ -500,7 +500,7 @@ get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id,
 		return hwpt;
 	if (hwpt->domain->type != IOMMU_DOMAIN_UNMANAGED ||
 	    hwpt->domain->ops != mock_ops.default_domain_ops) {
-		iommufd_put_object(&hwpt->obj);
+		iommufd_put_object(ucmd->ictx, &hwpt->obj);
 		return ERR_PTR(-EINVAL);
 	}
 	*mock = container_of(hwpt->domain, struct mock_iommu_domain, domain);
@@ -518,7 +518,7 @@ get_md_pagetable_nested(struct iommufd_ucmd *ucmd, u32 mockpt_id,
 		return hwpt;
 	if (hwpt->domain->type != IOMMU_DOMAIN_NESTED ||
 	    hwpt->domain->ops != &domain_nested_ops) {
-		iommufd_put_object(&hwpt->obj);
+		iommufd_put_object(ucmd->ictx, &hwpt->obj);
 		return ERR_PTR(-EINVAL);
 	}
 	*mock_nested = container_of(hwpt->domain,
@@ -681,7 +681,7 @@ static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd,
 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 
 out_dev_obj:
-	iommufd_put_object(dev_obj);
+	iommufd_put_object(ucmd->ictx, dev_obj);
 	return rc;
 }
 
@@ -699,7 +699,7 @@ static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd,
 	down_write(&ioas->iopt.iova_rwsem);
 	rc = iopt_reserve_iova(&ioas->iopt, start, start + length - 1, NULL);
 	up_write(&ioas->iopt.iova_rwsem);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ucmd->ictx, &ioas->obj);
 	return rc;
 }
 
@@ -754,7 +754,7 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd,
 	rc = 0;
 
 out_put:
-	iommufd_put_object(&hwpt->obj);
+	iommufd_put_object(ucmd->ictx, &hwpt->obj);
 	return rc;
 }
 
@@ -1233,7 +1233,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id,
 out_free:
 	kvfree(tmp);
 out_put:
-	iommufd_put_object(&hwpt->obj);
+	iommufd_put_object(ucmd->ictx, &hwpt->obj);
 	return rc;
 }
 
diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c
index 538fbf76354d..a3ad5f0b6c59 100644
--- a/drivers/iommu/iommufd/vfio_compat.c
+++ b/drivers/iommu/iommufd/vfio_compat.c
@@ -41,7 +41,7 @@ int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
 	if (IS_ERR(ioas))
 		return PTR_ERR(ioas);
 	*out_ioas_id = ioas->obj.id;
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ictx, &ioas->obj);
 	return 0;
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO);
@@ -98,7 +98,7 @@ int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx)
 
 	if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) {
 		ret = 0;
-		iommufd_put_object(&ictx->vfio_ioas->obj);
+		iommufd_put_object(ictx, &ictx->vfio_ioas->obj);
 		goto out_abort;
 	}
 	ictx->vfio_ioas = ioas;
@@ -133,7 +133,7 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
 		if (IS_ERR(ioas))
 			return PTR_ERR(ioas);
 		cmd->ioas_id = ioas->obj.id;
-		iommufd_put_object(&ioas->obj);
+		iommufd_put_object(ucmd->ictx, &ioas->obj);
 		return iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 
 	case IOMMU_VFIO_IOAS_SET:
@@ -143,7 +143,7 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
 		xa_lock(&ucmd->ictx->objects);
 		ucmd->ictx->vfio_ioas = ioas;
 		xa_unlock(&ucmd->ictx->objects);
-		iommufd_put_object(&ioas->obj);
+		iommufd_put_object(ucmd->ictx, &ioas->obj);
 		return 0;
 
 	case IOMMU_VFIO_IOAS_CLEAR:
@@ -190,7 +190,7 @@ static int iommufd_vfio_map_dma(struct iommufd_ctx *ictx, unsigned int cmd,
 	iova = map.iova;
 	rc = iopt_map_user_pages(ictx, &ioas->iopt, &iova, u64_to_user_ptr(map.vaddr),
 				 map.size, iommu_prot, 0);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ictx, &ioas->obj);
 	return rc;
 }
 
@@ -249,7 +249,7 @@ static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd,
 		rc = -EFAULT;
 
 err_put:
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ictx, &ioas->obj);
 	return rc;
 }
 
@@ -272,7 +272,7 @@ static int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx)
 	}
 	mutex_unlock(&ioas->mutex);
 
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ictx, &ioas->obj);
 	return rc;
 }
 
@@ -349,7 +349,7 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type)
 	 */
 	if (type == VFIO_TYPE1_IOMMU)
 		rc = iopt_disable_large_pages(&ioas->iopt);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ictx, &ioas->obj);
 	return rc;
 }
 
@@ -511,7 +511,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
 
 out_put:
 	up_read(&ioas->iopt.iova_rwsem);
-	iommufd_put_object(&ioas->obj);
+	iommufd_put_object(ictx, &ioas->obj);
 	return rc;
 }
 
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 157b286e36bf..35ba090f3b5e 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -112,16 +112,20 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
 					   const u32 *id)
 {
 	const struct iommu_ops *ops = NULL;
-	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+	struct iommu_fwspec *fwspec;
 	int err = NO_IOMMU;
 
 	if (!master_np)
 		return NULL;
 
+	/* Serialise to make dev->iommu stable under our potential fwspec */
+	mutex_lock(&iommu_probe_device_lock);
+	fwspec = dev_iommu_fwspec_get(dev);
 	if (fwspec) {
-		if (fwspec->ops)
+		if (fwspec->ops) {
+			mutex_unlock(&iommu_probe_device_lock);
 			return fwspec->ops;
-
+		}
 		/* In the deferred case, start again from scratch */
 		iommu_fwspec_free(dev);
 	}
@@ -155,6 +159,8 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
 		fwspec = dev_iommu_fwspec_get(dev);
 		ops    = fwspec->ops;
 	}
+	mutex_unlock(&iommu_probe_device_lock);
+
 	/*
 	 * If we have reason to believe the IOMMU driver missed the initial
 	 * probe for dev, replay it to get things in order.
@@ -191,7 +197,7 @@ iommu_resv_region_get_type(struct device *dev,
 	if (start == phys->start && end == phys->end)
 		return IOMMU_RESV_DIRECT;
 
-	dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", &phys,
+	dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", phys,
 		 &start, &end);
 	return IOMMU_RESV_RESERVED;
 }
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 974b84f6bd6a..ba1be15cfd8e 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -75,19 +75,6 @@ static ssize_t max_brightness_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(max_brightness);
 
-static ssize_t color_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	const char *color_text = "invalid";
-	struct led_classdev *led_cdev = dev_get_drvdata(dev);
-
-	if (led_cdev->color < LED_COLOR_ID_MAX)
-		color_text = led_colors[led_cdev->color];
-
-	return sysfs_emit(buf, "%s\n", color_text);
-}
-static DEVICE_ATTR_RO(color);
-
 #ifdef CONFIG_LEDS_TRIGGERS
 static BIN_ATTR(trigger, 0644, led_trigger_read, led_trigger_write, 0);
 static struct bin_attribute *led_trigger_bin_attrs[] = {
@@ -102,7 +89,6 @@ static const struct attribute_group led_trigger_group = {
 static struct attribute *led_class_attrs[] = {
 	&dev_attr_brightness.attr,
 	&dev_attr_max_brightness.attr,
-	&dev_attr_color.attr,
 	NULL,
 };
 
diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c
index e358e77e4b38..d76214fa9ad8 100644
--- a/drivers/leds/trigger/ledtrig-netdev.c
+++ b/drivers/leds/trigger/ledtrig-netdev.c
@@ -226,6 +226,11 @@ static int set_device_name(struct led_netdev_data *trigger_data,
 
 	cancel_delayed_work_sync(&trigger_data->work);
 
+	/*
+	 * Take RTNL lock before trigger_data lock to prevent potential
+	 * deadlock with netdev notifier registration.
+	 */
+	rtnl_lock();
 	mutex_lock(&trigger_data->lock);
 
 	if (trigger_data->net_dev) {
@@ -245,16 +250,14 @@ static int set_device_name(struct led_netdev_data *trigger_data,
 	trigger_data->carrier_link_up = false;
 	trigger_data->link_speed = SPEED_UNKNOWN;
 	trigger_data->duplex = DUPLEX_UNKNOWN;
-	if (trigger_data->net_dev != NULL) {
-		rtnl_lock();
+	if (trigger_data->net_dev)
 		get_device_state(trigger_data);
-		rtnl_unlock();
-	}
 
 	trigger_data->last_activity = 0;
 
 	set_baseline_state(trigger_data);
 	mutex_unlock(&trigger_data->lock);
+	rtnl_unlock();
 
 	return 0;
 }
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index de3019972b35..196cdacce38f 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -293,16 +293,16 @@ static void btree_complete_write(struct btree *b, struct btree_write *w)
 	w->journal	= NULL;
 }
 
-static void btree_node_write_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(btree_node_write_unlock)
 {
-	struct btree *b = container_of(cl, struct btree, io);
+	closure_type(b, struct btree, io);
 
 	up(&b->io_mutex);
 }
 
-static void __btree_node_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(__btree_node_write_done)
 {
-	struct btree *b = container_of(cl, struct btree, io);
+	closure_type(b, struct btree, io);
 	struct btree_write *w = btree_prev_write(b);
 
 	bch_bbio_free(b->bio, b->c);
@@ -315,12 +315,12 @@ static void __btree_node_write_done(struct closure *cl)
 	closure_return_with_destructor(cl, btree_node_write_unlock);
 }
 
-static void btree_node_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(btree_node_write_done)
 {
-	struct btree *b = container_of(cl, struct btree, io);
+	closure_type(b, struct btree, io);
 
 	bio_free_pages(b->bio);
-	__btree_node_write_done(cl);
+	__btree_node_write_done(&cl->work);
 }
 
 static void btree_node_write_endio(struct bio *bio)
@@ -1522,7 +1522,7 @@ out_nocoalesce:
 	bch_keylist_free(&keylist);
 
 	for (i = 0; i < nodes; i++)
-		if (!IS_ERR(new_nodes[i])) {
+		if (!IS_ERR_OR_NULL(new_nodes[i])) {
 			btree_node_free(new_nodes[i]);
 			rw_unlock(true, new_nodes[i]);
 		}
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index c182c21de2e8..7ff14bd2feb8 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -723,11 +723,11 @@ static void journal_write_endio(struct bio *bio)
 	closure_put(&w->c->journal.io);
 }
 
-static void journal_write(struct closure *cl);
+static CLOSURE_CALLBACK(journal_write);
 
-static void journal_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write_done)
 {
-	struct journal *j = container_of(cl, struct journal, io);
+	closure_type(j, struct journal, io);
 	struct journal_write *w = (j->cur == j->w)
 		? &j->w[1]
 		: &j->w[0];
@@ -736,19 +736,19 @@ static void journal_write_done(struct closure *cl)
 	continue_at_nobarrier(cl, journal_write, bch_journal_wq);
 }
 
-static void journal_write_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write_unlock)
 	__releases(&c->journal.lock)
 {
-	struct cache_set *c = container_of(cl, struct cache_set, journal.io);
+	closure_type(c, struct cache_set, journal.io);
 
 	c->journal.io_in_flight = 0;
 	spin_unlock(&c->journal.lock);
 }
 
-static void journal_write_unlocked(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write_unlocked)
 	__releases(c->journal.lock)
 {
-	struct cache_set *c = container_of(cl, struct cache_set, journal.io);
+	closure_type(c, struct cache_set, journal.io);
 	struct cache *ca = c->cache;
 	struct journal_write *w = c->journal.cur;
 	struct bkey *k = &c->journal.key;
@@ -823,12 +823,12 @@ static void journal_write_unlocked(struct closure *cl)
 	continue_at(cl, journal_write_done, NULL);
 }
 
-static void journal_write(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write)
 {
-	struct cache_set *c = container_of(cl, struct cache_set, journal.io);
+	closure_type(c, struct cache_set, journal.io);
 
 	spin_lock(&c->journal.lock);
-	journal_write_unlocked(cl);
+	journal_write_unlocked(&cl->work);
 }
 
 static void journal_try_write(struct cache_set *c)
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 9f32901fdad1..ebd500bdf0b2 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -35,16 +35,16 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
 
 /* Moving GC - IO loop */
 
-static void moving_io_destructor(struct closure *cl)
+static CLOSURE_CALLBACK(moving_io_destructor)
 {
-	struct moving_io *io = container_of(cl, struct moving_io, cl);
+	closure_type(io, struct moving_io, cl);
 
 	kfree(io);
 }
 
-static void write_moving_finish(struct closure *cl)
+static CLOSURE_CALLBACK(write_moving_finish)
 {
-	struct moving_io *io = container_of(cl, struct moving_io, cl);
+	closure_type(io, struct moving_io, cl);
 	struct bio *bio = &io->bio.bio;
 
 	bio_free_pages(bio);
@@ -89,9 +89,9 @@ static void moving_init(struct moving_io *io)
 	bch_bio_map(bio, NULL);
 }
 
-static void write_moving(struct closure *cl)
+static CLOSURE_CALLBACK(write_moving)
 {
-	struct moving_io *io = container_of(cl, struct moving_io, cl);
+	closure_type(io, struct moving_io, cl);
 	struct data_insert_op *op = &io->op;
 
 	if (!op->status) {
@@ -113,9 +113,9 @@ static void write_moving(struct closure *cl)
 	continue_at(cl, write_moving_finish, op->wq);
 }
 
-static void read_moving_submit(struct closure *cl)
+static CLOSURE_CALLBACK(read_moving_submit)
 {
-	struct moving_io *io = container_of(cl, struct moving_io, cl);
+	closure_type(io, struct moving_io, cl);
 	struct bio *bio = &io->bio.bio;
 
 	bch_submit_bbio(bio, io->op.c, &io->w->key, 0);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index a9b1f3896249..83d112bd2b1c 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -25,7 +25,7 @@
 
 struct kmem_cache *bch_search_cache;
 
-static void bch_data_insert_start(struct closure *cl);
+static CLOSURE_CALLBACK(bch_data_insert_start);
 
 static unsigned int cache_mode(struct cached_dev *dc)
 {
@@ -55,9 +55,9 @@ static void bio_csum(struct bio *bio, struct bkey *k)
 
 /* Insert data into cache */
 
-static void bch_data_insert_keys(struct closure *cl)
+static CLOSURE_CALLBACK(bch_data_insert_keys)
 {
-	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
+	closure_type(op, struct data_insert_op, cl);
 	atomic_t *journal_ref = NULL;
 	struct bkey *replace_key = op->replace ? &op->replace_key : NULL;
 	int ret;
@@ -136,9 +136,9 @@ out:
 	continue_at(cl, bch_data_insert_keys, op->wq);
 }
 
-static void bch_data_insert_error(struct closure *cl)
+static CLOSURE_CALLBACK(bch_data_insert_error)
 {
-	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
+	closure_type(op, struct data_insert_op, cl);
 
 	/*
 	 * Our data write just errored, which means we've got a bunch of keys to
@@ -163,7 +163,7 @@ static void bch_data_insert_error(struct closure *cl)
 
 	op->insert_keys.top = dst;
 
-	bch_data_insert_keys(cl);
+	bch_data_insert_keys(&cl->work);
 }
 
 static void bch_data_insert_endio(struct bio *bio)
@@ -184,9 +184,9 @@ static void bch_data_insert_endio(struct bio *bio)
 	bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
 }
 
-static void bch_data_insert_start(struct closure *cl)
+static CLOSURE_CALLBACK(bch_data_insert_start)
 {
-	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
+	closure_type(op, struct data_insert_op, cl);
 	struct bio *bio = op->bio, *n;
 
 	if (op->bypass)
@@ -305,16 +305,16 @@ err:
  * If op->bypass is true, instead of inserting the data it invalidates the
  * region of the cache represented by op->bio and op->inode.
  */
-void bch_data_insert(struct closure *cl)
+CLOSURE_CALLBACK(bch_data_insert)
 {
-	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
+	closure_type(op, struct data_insert_op, cl);
 
 	trace_bcache_write(op->c, op->inode, op->bio,
 			   op->writeback, op->bypass);
 
 	bch_keylist_init(&op->insert_keys);
 	bio_get(op->bio);
-	bch_data_insert_start(cl);
+	bch_data_insert_start(&cl->work);
 }
 
 /*
@@ -575,9 +575,9 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
 	return n == bio ? MAP_DONE : MAP_CONTINUE;
 }
 
-static void cache_lookup(struct closure *cl)
+static CLOSURE_CALLBACK(cache_lookup)
 {
-	struct search *s = container_of(cl, struct search, iop.cl);
+	closure_type(s, struct search, iop.cl);
 	struct bio *bio = &s->bio.bio;
 	struct cached_dev *dc;
 	int ret;
@@ -698,9 +698,9 @@ static void do_bio_hook(struct search *s,
 	bio_cnt_set(bio, 3);
 }
 
-static void search_free(struct closure *cl)
+static CLOSURE_CALLBACK(search_free)
 {
-	struct search *s = container_of(cl, struct search, cl);
+	closure_type(s, struct search, cl);
 
 	atomic_dec(&s->iop.c->search_inflight);
 
@@ -749,20 +749,20 @@ static inline struct search *search_alloc(struct bio *bio,
 
 /* Cached devices */
 
-static void cached_dev_bio_complete(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_bio_complete)
 {
-	struct search *s = container_of(cl, struct search, cl);
+	closure_type(s, struct search, cl);
 	struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 
 	cached_dev_put(dc);
-	search_free(cl);
+	search_free(&cl->work);
 }
 
 /* Process reads */
 
-static void cached_dev_read_error_done(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_read_error_done)
 {
-	struct search *s = container_of(cl, struct search, cl);
+	closure_type(s, struct search, cl);
 
 	if (s->iop.replace_collision)
 		bch_mark_cache_miss_collision(s->iop.c, s->d);
@@ -770,12 +770,12 @@ static void cached_dev_read_error_done(struct closure *cl)
 	if (s->iop.bio)
 		bio_free_pages(s->iop.bio);
 
-	cached_dev_bio_complete(cl);
+	cached_dev_bio_complete(&cl->work);
 }
 
-static void cached_dev_read_error(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_read_error)
 {
-	struct search *s = container_of(cl, struct search, cl);
+	closure_type(s, struct search, cl);
 	struct bio *bio = &s->bio.bio;
 
 	/*
@@ -801,9 +801,9 @@ static void cached_dev_read_error(struct closure *cl)
 	continue_at(cl, cached_dev_read_error_done, NULL);
 }
 
-static void cached_dev_cache_miss_done(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_cache_miss_done)
 {
-	struct search *s = container_of(cl, struct search, cl);
+	closure_type(s, struct search, cl);
 	struct bcache_device *d = s->d;
 
 	if (s->iop.replace_collision)
@@ -812,13 +812,13 @@ static void cached_dev_cache_miss_done(struct closure *cl)
 	if (s->iop.bio)
 		bio_free_pages(s->iop.bio);
 
-	cached_dev_bio_complete(cl);
+	cached_dev_bio_complete(&cl->work);
 	closure_put(&d->cl);
 }
 
-static void cached_dev_read_done(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_read_done)
 {
-	struct search *s = container_of(cl, struct search, cl);
+	closure_type(s, struct search, cl);
 	struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 
 	/*
@@ -858,9 +858,9 @@ static void cached_dev_read_done(struct closure *cl)
 	continue_at(cl, cached_dev_cache_miss_done, NULL);
 }
 
-static void cached_dev_read_done_bh(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_read_done_bh)
 {
-	struct search *s = container_of(cl, struct search, cl);
+	closure_type(s, struct search, cl);
 	struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 
 	bch_mark_cache_accounting(s->iop.c, s->d,
@@ -955,13 +955,13 @@ static void cached_dev_read(struct cached_dev *dc, struct search *s)
 
 /* Process writes */
 
-static void cached_dev_write_complete(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_write_complete)
 {
-	struct search *s = container_of(cl, struct search, cl);
+	closure_type(s, struct search, cl);
 	struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 
 	up_read_non_owner(&dc->writeback_lock);
-	cached_dev_bio_complete(cl);
+	cached_dev_bio_complete(&cl->work);
 }
 
 static void cached_dev_write(struct cached_dev *dc, struct search *s)
@@ -1048,9 +1048,9 @@ insert_data:
 	continue_at(cl, cached_dev_write_complete, NULL);
 }
 
-static void cached_dev_nodata(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_nodata)
 {
-	struct search *s = container_of(cl, struct search, cl);
+	closure_type(s, struct search, cl);
 	struct bio *bio = &s->bio.bio;
 
 	if (s->iop.flush_journal)
@@ -1265,9 +1265,9 @@ static int flash_dev_cache_miss(struct btree *b, struct search *s,
 	return MAP_CONTINUE;
 }
 
-static void flash_dev_nodata(struct closure *cl)
+static CLOSURE_CALLBACK(flash_dev_nodata)
 {
-	struct search *s = container_of(cl, struct search, cl);
+	closure_type(s, struct search, cl);
 
 	if (s->iop.flush_journal)
 		bch_journal_meta(s->iop.c, cl);
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 38ab4856eaab..46bbef00aebb 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -34,7 +34,7 @@ struct data_insert_op {
 };
 
 unsigned int bch_get_congested(const struct cache_set *c);
-void bch_data_insert(struct closure *cl);
+CLOSURE_CALLBACK(bch_data_insert);
 
 void bch_cached_dev_request_init(struct cached_dev *dc);
 void cached_dev_submit_bio(struct bio *bio);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index bfe1685dbae5..1402096b8076 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -327,9 +327,9 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
 	submit_bio(bio);
 }
 
-static void bch_write_bdev_super_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(bch_write_bdev_super_unlock)
 {
-	struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write);
+	closure_type(dc, struct cached_dev, sb_write);
 
 	up(&dc->sb_write_mutex);
 }
@@ -363,9 +363,9 @@ static void write_super_endio(struct bio *bio)
 	closure_put(&ca->set->sb_write);
 }
 
-static void bcache_write_super_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(bcache_write_super_unlock)
 {
-	struct cache_set *c = container_of(cl, struct cache_set, sb_write);
+	closure_type(c, struct cache_set, sb_write);
 
 	up(&c->sb_write_mutex);
 }
@@ -407,9 +407,9 @@ static void uuid_endio(struct bio *bio)
 	closure_put(cl);
 }
 
-static void uuid_io_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(uuid_io_unlock)
 {
-	struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
+	closure_type(c, struct cache_set, uuid_write);
 
 	up(&c->uuid_write_mutex);
 }
@@ -1344,9 +1344,9 @@ void bch_cached_dev_release(struct kobject *kobj)
 	module_put(THIS_MODULE);
 }
 
-static void cached_dev_free(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_free)
 {
-	struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
+	closure_type(dc, struct cached_dev, disk.cl);
 
 	if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
 		cancel_writeback_rate_update_dwork(dc);
@@ -1378,9 +1378,9 @@ static void cached_dev_free(struct closure *cl)
 	kobject_put(&dc->disk.kobj);
 }
 
-static void cached_dev_flush(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_flush)
 {
-	struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
+	closure_type(dc, struct cached_dev, disk.cl);
 	struct bcache_device *d = &dc->disk;
 
 	mutex_lock(&bch_register_lock);
@@ -1499,9 +1499,9 @@ void bch_flash_dev_release(struct kobject *kobj)
 	kfree(d);
 }
 
-static void flash_dev_free(struct closure *cl)
+static CLOSURE_CALLBACK(flash_dev_free)
 {
-	struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+	closure_type(d, struct bcache_device, cl);
 
 	mutex_lock(&bch_register_lock);
 	atomic_long_sub(bcache_dev_sectors_dirty(d),
@@ -1512,9 +1512,9 @@ static void flash_dev_free(struct closure *cl)
 	kobject_put(&d->kobj);
 }
 
-static void flash_dev_flush(struct closure *cl)
+static CLOSURE_CALLBACK(flash_dev_flush)
 {
-	struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+	closure_type(d, struct bcache_device, cl);
 
 	mutex_lock(&bch_register_lock);
 	bcache_device_unlink(d);
@@ -1670,9 +1670,9 @@ void bch_cache_set_release(struct kobject *kobj)
 	module_put(THIS_MODULE);
 }
 
-static void cache_set_free(struct closure *cl)
+static CLOSURE_CALLBACK(cache_set_free)
 {
-	struct cache_set *c = container_of(cl, struct cache_set, cl);
+	closure_type(c, struct cache_set, cl);
 	struct cache *ca;
 
 	debugfs_remove(c->debug);
@@ -1711,9 +1711,9 @@ static void cache_set_free(struct closure *cl)
 	kobject_put(&c->kobj);
 }
 
-static void cache_set_flush(struct closure *cl)
+static CLOSURE_CALLBACK(cache_set_flush)
 {
-	struct cache_set *c = container_of(cl, struct cache_set, caching);
+	closure_type(c, struct cache_set, caching);
 	struct cache *ca = c->cache;
 	struct btree *b;
 
@@ -1808,9 +1808,9 @@ static void conditional_stop_bcache_device(struct cache_set *c,
 	}
 }
 
-static void __cache_set_unregister(struct closure *cl)
+static CLOSURE_CALLBACK(__cache_set_unregister)
 {
-	struct cache_set *c = container_of(cl, struct cache_set, caching);
+	closure_type(c, struct cache_set, caching);
 	struct cached_dev *dc;
 	struct bcache_device *d;
 	size_t i;
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 3accfdaee6b1..8827a6f130ad 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -341,16 +341,16 @@ static void dirty_init(struct keybuf_key *w)
 	bch_bio_map(bio, NULL);
 }
 
-static void dirty_io_destructor(struct closure *cl)
+static CLOSURE_CALLBACK(dirty_io_destructor)
 {
-	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+	closure_type(io, struct dirty_io, cl);
 
 	kfree(io);
 }
 
-static void write_dirty_finish(struct closure *cl)
+static CLOSURE_CALLBACK(write_dirty_finish)
 {
-	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+	closure_type(io, struct dirty_io, cl);
 	struct keybuf_key *w = io->bio.bi_private;
 	struct cached_dev *dc = io->dc;
 
@@ -400,9 +400,9 @@ static void dirty_endio(struct bio *bio)
 	closure_put(&io->cl);
 }
 
-static void write_dirty(struct closure *cl)
+static CLOSURE_CALLBACK(write_dirty)
 {
-	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+	closure_type(io, struct dirty_io, cl);
 	struct keybuf_key *w = io->bio.bi_private;
 	struct cached_dev *dc = io->dc;
 
@@ -462,9 +462,9 @@ static void read_dirty_endio(struct bio *bio)
 	dirty_endio(bio);
 }
 
-static void read_dirty_submit(struct closure *cl)
+static CLOSURE_CALLBACK(read_dirty_submit)
 {
-	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+	closure_type(io, struct dirty_io, cl);
 
 	closure_bio_submit(io->dc->disk.c, &io->bio, cl);
 
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 120153e44ae0..f57fb821528d 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -434,7 +434,7 @@ static struct bio *clone_bio(struct dm_target *ti, struct flakey_c *fc, struct b
 
 	remaining_size = size;
 
-	order = MAX_ORDER - 1;
+	order = MAX_ORDER;
 	while (remaining_size) {
 		struct page *pages;
 		unsigned size_to_add, to_copy;
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 2099c755119e..b475200d8586 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -24,7 +24,8 @@ bool verity_fec_is_enabled(struct dm_verity *v)
  */
 static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io)
 {
-	return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io);
+	return (struct dm_verity_fec_io *)
+		((char *)io + io->v->ti->per_io_data_size - sizeof(struct dm_verity_fec_io));
 }
 
 /*
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index e115fcfe723c..14e58ae70521 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -642,7 +642,6 @@ static void verity_work(struct work_struct *w)
 
 	io->in_tasklet = false;
 
-	verity_fec_init_io(io);
 	verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
 }
 
@@ -668,7 +667,9 @@ static void verity_end_io(struct bio *bio)
 	struct dm_verity_io *io = bio->bi_private;
 
 	if (bio->bi_status &&
-	    (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) {
+	    (!verity_fec_is_enabled(io->v) ||
+	     verity_is_system_shutting_down() ||
+	     (bio->bi_opf & REQ_RAHEAD))) {
 		verity_finish_io(io, bio->bi_status);
 		return;
 	}
@@ -792,6 +793,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
 	bio->bi_private = io;
 	io->iter = bio->bi_iter;
 
+	verity_fec_init_io(io);
+
 	verity_submit_prefetch(v, io);
 
 	submit_bio_noacct(bio);
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index f96f4e281ee4..f9d522c870e6 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -115,12 +115,6 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v,
 	return (u8 *)(io + 1) + v->ahash_reqsize + v->digest_size;
 }
 
-static inline u8 *verity_io_digest_end(struct dm_verity *v,
-				       struct dm_verity_io *io)
-{
-	return verity_io_want_digest(v, io) + v->digest_size;
-}
-
 extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
 			       struct bvec_iter *iter,
 			       int (*process)(struct dm_verity *v,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c94373d64f2c..b066abbffd10 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -490,7 +490,7 @@ int mddev_suspend(struct mddev *mddev, bool interruptible)
 }
 EXPORT_SYMBOL_GPL(mddev_suspend);
 
-void mddev_resume(struct mddev *mddev)
+static void __mddev_resume(struct mddev *mddev, bool recovery_needed)
 {
 	lockdep_assert_not_held(&mddev->reconfig_mutex);
 
@@ -507,12 +507,18 @@ void mddev_resume(struct mddev *mddev)
 	percpu_ref_resurrect(&mddev->active_io);
 	wake_up(&mddev->sb_wait);
 
-	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+	if (recovery_needed)
+		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
 	md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
 
 	mutex_unlock(&mddev->suspend_mutex);
 }
+
+void mddev_resume(struct mddev *mddev)
+{
+	return __mddev_resume(mddev, true);
+}
 EXPORT_SYMBOL_GPL(mddev_resume);
 
 /*
@@ -4840,25 +4846,29 @@ action_show(struct mddev *mddev, char *page)
 	return sprintf(page, "%s\n", type);
 }
 
-static void stop_sync_thread(struct mddev *mddev)
+/**
+ * stop_sync_thread() - wait for sync_thread to stop if it's running.
+ * @mddev:	the array.
+ * @locked:	if set, reconfig_mutex will still be held after this function
+ *		return; if not set, reconfig_mutex will be released after this
+ *		function return.
+ * @check_seq:	if set, only wait for curent running sync_thread to stop, noted
+ *		that new sync_thread can still start.
+ */
+static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
 {
-	if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		return;
+	int sync_seq;
 
-	if (mddev_lock(mddev))
-		return;
+	if (check_seq)
+		sync_seq = atomic_read(&mddev->sync_seq);
 
-	/*
-	 * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
-	 * held.
-	 */
 	if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
-		mddev_unlock(mddev);
+		if (!locked)
+			mddev_unlock(mddev);
 		return;
 	}
 
-	if (work_pending(&mddev->del_work))
-		flush_workqueue(md_misc_wq);
+	mddev_unlock(mddev);
 
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	/*
@@ -4866,21 +4876,28 @@ static void stop_sync_thread(struct mddev *mddev)
 	 * never happen
 	 */
 	md_wakeup_thread_directly(mddev->sync_thread);
+	if (work_pending(&mddev->sync_work))
+		flush_work(&mddev->sync_work);
 
-	mddev_unlock(mddev);
+	wait_event(resync_wait,
+		   !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+		   (check_seq && sync_seq != atomic_read(&mddev->sync_seq)));
+
+	if (locked)
+		mddev_lock_nointr(mddev);
 }
 
 static void idle_sync_thread(struct mddev *mddev)
 {
-	int sync_seq = atomic_read(&mddev->sync_seq);
-
 	mutex_lock(&mddev->sync_mutex);
 	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-	stop_sync_thread(mddev);
 
-	wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
-			!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+	if (mddev_lock(mddev)) {
+		mutex_unlock(&mddev->sync_mutex);
+		return;
+	}
 
+	stop_sync_thread(mddev, false, true);
 	mutex_unlock(&mddev->sync_mutex);
 }
 
@@ -4888,11 +4905,13 @@ static void frozen_sync_thread(struct mddev *mddev)
 {
 	mutex_lock(&mddev->sync_mutex);
 	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-	stop_sync_thread(mddev);
 
-	wait_event(resync_wait, mddev->sync_thread == NULL &&
-			!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+	if (mddev_lock(mddev)) {
+		mutex_unlock(&mddev->sync_mutex);
+		return;
+	}
 
+	stop_sync_thread(mddev, false, false);
 	mutex_unlock(&mddev->sync_mutex);
 }
 
@@ -6264,14 +6283,7 @@ static void md_clean(struct mddev *mddev)
 
 static void __md_stop_writes(struct mddev *mddev)
 {
-	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-	if (work_pending(&mddev->del_work))
-		flush_workqueue(md_misc_wq);
-	if (mddev->sync_thread) {
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-		md_reap_sync_thread(mddev);
-	}
-
+	stop_sync_thread(mddev, true, false);
 	del_timer_sync(&mddev->safemode_timer);
 
 	if (mddev->pers && mddev->pers->quiesce) {
@@ -6355,25 +6367,16 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
 	int err = 0;
 	int did_freeze = 0;
 
+	if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
+		return -EBUSY;
+
 	if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
 		did_freeze = 1;
 		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 		md_wakeup_thread(mddev->thread);
 	}
-	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 
-	/*
-	 * Thread might be blocked waiting for metadata update which will now
-	 * never happen
-	 */
-	md_wakeup_thread_directly(mddev->sync_thread);
-
-	if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
-		return -EBUSY;
-	mddev_unlock(mddev);
-	wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
-					  &mddev->recovery));
+	stop_sync_thread(mddev, false, false);
 	wait_event(mddev->sb_wait,
 		   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
 	mddev_lock_nointr(mddev);
@@ -6383,29 +6386,30 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
 	    mddev->sync_thread ||
 	    test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
 		pr_warn("md: %s still in use.\n",mdname(mddev));
-		if (did_freeze) {
-			clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-			md_wakeup_thread(mddev->thread);
-		}
 		err = -EBUSY;
 		goto out;
 	}
+
 	if (mddev->pers) {
 		__md_stop_writes(mddev);
 
-		err  = -ENXIO;
-		if (mddev->ro == MD_RDONLY)
+		if (mddev->ro == MD_RDONLY) {
+			err  = -ENXIO;
 			goto out;
+		}
+
 		mddev->ro = MD_RDONLY;
 		set_disk_ro(mddev->gendisk, 1);
+	}
+
+out:
+	if ((mddev->pers && !err) || did_freeze) {
 		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 		md_wakeup_thread(mddev->thread);
 		sysfs_notify_dirent_safe(mddev->sysfs_state);
-		err = 0;
 	}
-out:
+
 	mutex_unlock(&mddev->open_mutex);
 	return err;
 }
@@ -6426,20 +6430,8 @@ static int do_md_stop(struct mddev *mddev, int mode,
 		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 		md_wakeup_thread(mddev->thread);
 	}
-	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
-	/*
-	 * Thread might be blocked waiting for metadata update which will now
-	 * never happen
-	 */
-	md_wakeup_thread_directly(mddev->sync_thread);
 
-	mddev_unlock(mddev);
-	wait_event(resync_wait, (mddev->sync_thread == NULL &&
-				 !test_bit(MD_RECOVERY_RUNNING,
-					   &mddev->recovery)));
-	mddev_lock_nointr(mddev);
+	stop_sync_thread(mddev, true, false);
 
 	mutex_lock(&mddev->open_mutex);
 	if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
@@ -9403,7 +9395,15 @@ static void md_start_sync(struct work_struct *ws)
 		goto not_running;
 	}
 
-	suspend ? mddev_unlock_and_resume(mddev) : mddev_unlock(mddev);
+	mddev_unlock(mddev);
+	/*
+	 * md_start_sync was triggered by MD_RECOVERY_NEEDED, so we should
+	 * not set it again. Otherwise, we may cause issue like this one:
+	 *     https://bugzilla.kernel.org/show_bug.cgi?id=218200
+	 * Therefore, use __mddev_resume(mddev, false).
+	 */
+	if (suspend)
+		__mddev_resume(mddev, false);
 	md_wakeup_thread(mddev->sync_thread);
 	sysfs_notify_dirent_safe(mddev->sysfs_action);
 	md_new_event();
@@ -9415,7 +9415,15 @@ not_running:
 	clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
 	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
 	clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-	suspend ? mddev_unlock_and_resume(mddev) : mddev_unlock(mddev);
+	mddev_unlock(mddev);
+	/*
+	 * md_start_sync was triggered by MD_RECOVERY_NEEDED, so we should
+	 * not set it again. Otherwise, we may cause issue like this one:
+	 *     https://bugzilla.kernel.org/show_bug.cgi?id=218200
+	 * Therefore, use __mddev_resume(mddev, false).
+	 */
+	if (suspend)
+		__mddev_resume(mddev, false);
 
 	wake_up(&resync_wait);
 	if (test_and_clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index dc031d42f53b..26e1e8a5e941 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5892,11 +5892,11 @@ static bool stripe_ahead_of_reshape(struct mddev *mddev, struct r5conf *conf,
 	int dd_idx;
 
 	for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) {
-		if (dd_idx == sh->pd_idx)
+		if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
 			continue;
 
 		min_sector = min(min_sector, sh->dev[dd_idx].sector);
-		max_sector = min(max_sector, sh->dev[dd_idx].sector);
+		max_sector = max(max_sector, sh->dev[dd_idx].sector);
 	}
 
 	spin_lock_irq(&conf->device_lock);
diff --git a/drivers/media/pci/mgb4/Kconfig b/drivers/media/pci/mgb4/Kconfig
index 13fad15a434c..f2a05a1c8ffa 100644
--- a/drivers/media/pci/mgb4/Kconfig
+++ b/drivers/media/pci/mgb4/Kconfig
@@ -2,6 +2,7 @@
 config VIDEO_MGB4
 	tristate "Digiteq Automotive MGB4 support"
 	depends on VIDEO_DEV && PCI && I2C && DMADEVICES && SPI && MTD && IIO
+	depends on COMMON_CLK
 	select VIDEOBUF2_DMA_SG
 	select IIO_BUFFER
 	select IIO_TRIGGERED_BUFFER
diff --git a/drivers/media/pci/mgb4/mgb4_core.c b/drivers/media/pci/mgb4/mgb4_core.c
index 3efb33fbf40c..5bfb8a06202e 100644
--- a/drivers/media/pci/mgb4/mgb4_core.c
+++ b/drivers/media/pci/mgb4/mgb4_core.c
@@ -42,6 +42,10 @@
 
 #define MGB4_USER_IRQS 16
 
+#define DIGITEQ_VID 0x1ed8
+#define T100_DID    0x0101
+#define T200_DID    0x0201
+
 ATTRIBUTE_GROUPS(mgb4_pci);
 
 static int flashid;
@@ -151,7 +155,7 @@ static struct spi_master *get_spi_adap(struct platform_device *pdev)
 	return dev ? container_of(dev, struct spi_master, dev) : NULL;
 }
 
-static int init_spi(struct mgb4_dev *mgbdev)
+static int init_spi(struct mgb4_dev *mgbdev, u32 devid)
 {
 	struct resource spi_resources[] = {
 		{
@@ -213,8 +217,13 @@ static int init_spi(struct mgb4_dev *mgbdev)
 	snprintf(mgbdev->fw_part_name, sizeof(mgbdev->fw_part_name),
 		 "mgb4-fw.%d", flashid);
 	mgbdev->partitions[0].name = mgbdev->fw_part_name;
-	mgbdev->partitions[0].size = 0x400000;
-	mgbdev->partitions[0].offset = 0x400000;
+	if (devid == T200_DID) {
+		mgbdev->partitions[0].size = 0x950000;
+		mgbdev->partitions[0].offset = 0x1000000;
+	} else {
+		mgbdev->partitions[0].size = 0x400000;
+		mgbdev->partitions[0].offset = 0x400000;
+	}
 	mgbdev->partitions[0].mask_flags = 0;
 
 	snprintf(mgbdev->data_part_name, sizeof(mgbdev->data_part_name),
@@ -551,7 +560,7 @@ static int mgb4_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_video_regs;
 
 	/* SPI FLASH */
-	rv = init_spi(mgbdev);
+	rv = init_spi(mgbdev, id->device);
 	if (rv < 0)
 		goto err_cmt_regs;
 
@@ -666,7 +675,8 @@ static void mgb4_remove(struct pci_dev *pdev)
 }
 
 static const struct pci_device_id mgb4_pci_ids[] = {
-	{ PCI_DEVICE(0x1ed8, 0x0101), },
+	{ PCI_DEVICE(DIGITEQ_VID, T100_DID), },
+	{ PCI_DEVICE(DIGITEQ_VID, T200_DID), },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, mgb4_pci_ids);
diff --git a/drivers/media/platform/renesas/vsp1/vsp1_pipe.c b/drivers/media/platform/renesas/vsp1/vsp1_pipe.c
index f8093ba9539e..68d05243c3ee 100644
--- a/drivers/media/platform/renesas/vsp1/vsp1_pipe.c
+++ b/drivers/media/platform/renesas/vsp1/vsp1_pipe.c
@@ -373,7 +373,7 @@ int vsp1_pipeline_stop(struct vsp1_pipeline *pipe)
 			   (7 << VI6_DPR_SMPPT_TGW_SHIFT) |
 			   (VI6_DPR_NODE_UNUSED << VI6_DPR_SMPPT_PT_SHIFT));
 
-	v4l2_subdev_call(&pipe->output->entity.subdev, video, s_stream, 0);
+	vsp1_wpf_stop(pipe->output);
 
 	return ret;
 }
diff --git a/drivers/media/platform/renesas/vsp1/vsp1_rpf.c b/drivers/media/platform/renesas/vsp1/vsp1_rpf.c
index 3b17f5fa4067..ea12c3f12c92 100644
--- a/drivers/media/platform/renesas/vsp1/vsp1_rpf.c
+++ b/drivers/media/platform/renesas/vsp1/vsp1_rpf.c
@@ -44,14 +44,6 @@ static inline void vsp1_rpf_write(struct vsp1_rwpf *rpf,
 }
 
 /* -----------------------------------------------------------------------------
- * V4L2 Subdevice Operations
- */
-
-static const struct v4l2_subdev_ops rpf_ops = {
-	.pad    = &vsp1_rwpf_pad_ops,
-};
-
-/* -----------------------------------------------------------------------------
  * VSP1 Entity Operations
  */
 
@@ -411,7 +403,7 @@ struct vsp1_rwpf *vsp1_rpf_create(struct vsp1_device *vsp1, unsigned int index)
 	rpf->entity.index = index;
 
 	sprintf(name, "rpf.%u", index);
-	ret = vsp1_entity_init(vsp1, &rpf->entity, name, 2, &rpf_ops,
+	ret = vsp1_entity_init(vsp1, &rpf->entity, name, 2, &vsp1_rwpf_subdev_ops,
 			       MEDIA_ENT_F_PROC_VIDEO_PIXEL_FORMATTER);
 	if (ret < 0)
 		return ERR_PTR(ret);
diff --git a/drivers/media/platform/renesas/vsp1/vsp1_rwpf.c b/drivers/media/platform/renesas/vsp1/vsp1_rwpf.c
index 22a82d218152..e0f87c8103ca 100644
--- a/drivers/media/platform/renesas/vsp1/vsp1_rwpf.c
+++ b/drivers/media/platform/renesas/vsp1/vsp1_rwpf.c
@@ -24,7 +24,7 @@ struct v4l2_rect *vsp1_rwpf_get_crop(struct vsp1_rwpf *rwpf,
 }
 
 /* -----------------------------------------------------------------------------
- * V4L2 Subdevice Pad Operations
+ * V4L2 Subdevice Operations
  */
 
 static int vsp1_rwpf_enum_mbus_code(struct v4l2_subdev *subdev,
@@ -243,7 +243,7 @@ done:
 	return ret;
 }
 
-const struct v4l2_subdev_pad_ops vsp1_rwpf_pad_ops = {
+static const struct v4l2_subdev_pad_ops vsp1_rwpf_pad_ops = {
 	.init_cfg = vsp1_entity_init_cfg,
 	.enum_mbus_code = vsp1_rwpf_enum_mbus_code,
 	.enum_frame_size = vsp1_rwpf_enum_frame_size,
@@ -253,6 +253,10 @@ const struct v4l2_subdev_pad_ops vsp1_rwpf_pad_ops = {
 	.set_selection = vsp1_rwpf_set_selection,
 };
 
+const struct v4l2_subdev_ops vsp1_rwpf_subdev_ops = {
+	.pad    = &vsp1_rwpf_pad_ops,
+};
+
 /* -----------------------------------------------------------------------------
  * Controls
  */
diff --git a/drivers/media/platform/renesas/vsp1/vsp1_rwpf.h b/drivers/media/platform/renesas/vsp1/vsp1_rwpf.h
index eac5c04c2239..e0d212c70b2f 100644
--- a/drivers/media/platform/renesas/vsp1/vsp1_rwpf.h
+++ b/drivers/media/platform/renesas/vsp1/vsp1_rwpf.h
@@ -79,9 +79,11 @@ static inline struct vsp1_rwpf *entity_to_rwpf(struct vsp1_entity *entity)
 struct vsp1_rwpf *vsp1_rpf_create(struct vsp1_device *vsp1, unsigned int index);
 struct vsp1_rwpf *vsp1_wpf_create(struct vsp1_device *vsp1, unsigned int index);
 
+void vsp1_wpf_stop(struct vsp1_rwpf *wpf);
+
 int vsp1_rwpf_init_ctrls(struct vsp1_rwpf *rwpf, unsigned int ncontrols);
 
-extern const struct v4l2_subdev_pad_ops vsp1_rwpf_pad_ops;
+extern const struct v4l2_subdev_ops vsp1_rwpf_subdev_ops;
 
 struct v4l2_rect *vsp1_rwpf_get_crop(struct vsp1_rwpf *rwpf,
 				     struct v4l2_subdev_state *sd_state);
diff --git a/drivers/media/platform/renesas/vsp1/vsp1_wpf.c b/drivers/media/platform/renesas/vsp1/vsp1_wpf.c
index d0074ca00920..cab4445eca69 100644
--- a/drivers/media/platform/renesas/vsp1/vsp1_wpf.c
+++ b/drivers/media/platform/renesas/vsp1/vsp1_wpf.c
@@ -186,17 +186,13 @@ static int wpf_init_controls(struct vsp1_rwpf *wpf)
 }
 
 /* -----------------------------------------------------------------------------
- * V4L2 Subdevice Core Operations
+ * VSP1 Entity Operations
  */
 
-static int wpf_s_stream(struct v4l2_subdev *subdev, int enable)
+void vsp1_wpf_stop(struct vsp1_rwpf *wpf)
 {
-	struct vsp1_rwpf *wpf = to_rwpf(subdev);
 	struct vsp1_device *vsp1 = wpf->entity.vsp1;
 
-	if (enable)
-		return 0;
-
 	/*
 	 * Write to registers directly when stopping the stream as there will be
 	 * no pipeline run to apply the display list.
@@ -204,27 +200,8 @@ static int wpf_s_stream(struct v4l2_subdev *subdev, int enable)
 	vsp1_write(vsp1, VI6_WPF_IRQ_ENB(wpf->entity.index), 0);
 	vsp1_write(vsp1, wpf->entity.index * VI6_WPF_OFFSET +
 		   VI6_WPF_SRCRPF, 0);
-
-	return 0;
 }
 
-/* -----------------------------------------------------------------------------
- * V4L2 Subdevice Operations
- */
-
-static const struct v4l2_subdev_video_ops wpf_video_ops = {
-	.s_stream = wpf_s_stream,
-};
-
-static const struct v4l2_subdev_ops wpf_ops = {
-	.video	= &wpf_video_ops,
-	.pad    = &vsp1_rwpf_pad_ops,
-};
-
-/* -----------------------------------------------------------------------------
- * VSP1 Entity Operations
- */
-
 static void vsp1_wpf_destroy(struct vsp1_entity *entity)
 {
 	struct vsp1_rwpf *wpf = entity_to_rwpf(entity);
@@ -583,7 +560,7 @@ struct vsp1_rwpf *vsp1_wpf_create(struct vsp1_device *vsp1, unsigned int index)
 	wpf->entity.index = index;
 
 	sprintf(name, "wpf.%u", index);
-	ret = vsp1_entity_init(vsp1, &wpf->entity, name, 2, &wpf_ops,
+	ret = vsp1_entity_init(vsp1, &wpf->entity, name, 2, &vsp1_rwpf_subdev_ops,
 			       MEDIA_ENT_F_PROC_VIDEO_PIXEL_FORMATTER);
 	if (ret < 0)
 		return ERR_PTR(ret);
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index 9c8fc87938a7..9d090fa07516 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -2011,7 +2011,7 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, unsigned long time
 
 	mei_hdr = mei_msg_hdr_init(cb);
 	if (IS_ERR(mei_hdr)) {
-		rets = -PTR_ERR(mei_hdr);
+		rets = PTR_ERR(mei_hdr);
 		mei_hdr = NULL;
 		goto err;
 	}
@@ -2032,7 +2032,7 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, unsigned long time
 
 	hbuf_slots = mei_hbuf_empty_slots(dev);
 	if (hbuf_slots < 0) {
-		rets = -EOVERFLOW;
+		buf_len = -EOVERFLOW;
 		goto out;
 	}
 
diff --git a/drivers/misc/mei/pxp/mei_pxp.c b/drivers/misc/mei/pxp/mei_pxp.c
index f77d78fa5054..787c6a27a4be 100644
--- a/drivers/misc/mei/pxp/mei_pxp.c
+++ b/drivers/misc/mei/pxp/mei_pxp.c
@@ -84,9 +84,10 @@ mei_pxp_send_message(struct device *dev, const void *message, size_t size, unsig
 				byte = ret;
 			break;
 		}
+		return byte;
 	}
 
-	return byte;
+	return 0;
 }
 
 /**
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 152dfe593c43..f9a5cffa64b1 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1482,6 +1482,8 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req)
 			blk_mq_requeue_request(req, true);
 		else
 			__blk_mq_end_request(req, BLK_STS_OK);
+	} else if (mq->in_recovery) {
+		blk_mq_requeue_request(req, true);
 	} else {
 		blk_mq_end_request(req, BLK_STS_OK);
 	}
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 3d3e0ca52614..a8c17b4cd737 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -551,7 +551,9 @@ int mmc_cqe_recovery(struct mmc_host *host)
 	cmd.flags        = MMC_RSP_R1B | MMC_CMD_AC;
 	cmd.flags       &= ~MMC_RSP_CRC; /* Ignore CRC */
 	cmd.busy_timeout = MMC_CQE_RECOVERY_TIMEOUT;
-	mmc_wait_for_cmd(host, &cmd, 0);
+	mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES);
+
+	mmc_poll_for_busy(host->card, MMC_CQE_RECOVERY_TIMEOUT, true, MMC_BUSY_IO);
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode       = MMC_CMDQ_TASK_MGMT;
@@ -559,10 +561,13 @@ int mmc_cqe_recovery(struct mmc_host *host)
 	cmd.flags        = MMC_RSP_R1B | MMC_CMD_AC;
 	cmd.flags       &= ~MMC_RSP_CRC; /* Ignore CRC */
 	cmd.busy_timeout = MMC_CQE_RECOVERY_TIMEOUT;
-	err = mmc_wait_for_cmd(host, &cmd, 0);
+	err = mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES);
 
 	host->cqe_ops->cqe_recovery_finish(host);
 
+	if (err)
+		err = mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES);
+
 	mmc_retune_release(host);
 
 	return err;
diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c
index b3d7d6d8d654..41e94cd14109 100644
--- a/drivers/mmc/host/cqhci-core.c
+++ b/drivers/mmc/host/cqhci-core.c
@@ -942,8 +942,8 @@ static bool cqhci_clear_all_tasks(struct mmc_host *mmc, unsigned int timeout)
 	ret = cqhci_tasks_cleared(cq_host);
 
 	if (!ret)
-		pr_debug("%s: cqhci: Failed to clear tasks\n",
-			 mmc_hostname(mmc));
+		pr_warn("%s: cqhci: Failed to clear tasks\n",
+			mmc_hostname(mmc));
 
 	return ret;
 }
@@ -976,7 +976,7 @@ static bool cqhci_halt(struct mmc_host *mmc, unsigned int timeout)
 	ret = cqhci_halted(cq_host);
 
 	if (!ret)
-		pr_debug("%s: cqhci: Failed to halt\n", mmc_hostname(mmc));
+		pr_warn("%s: cqhci: Failed to halt\n", mmc_hostname(mmc));
 
 	return ret;
 }
@@ -984,10 +984,10 @@ static bool cqhci_halt(struct mmc_host *mmc, unsigned int timeout)
 /*
  * After halting we expect to be able to use the command line. We interpret the
  * failure to halt to mean the data lines might still be in use (and the upper
- * layers will need to send a STOP command), so we set the timeout based on a
- * generous command timeout.
+ * layers will need to send a STOP command), however failing to halt complicates
+ * the recovery, so set a timeout that would reasonably allow I/O to complete.
  */
-#define CQHCI_START_HALT_TIMEOUT	5
+#define CQHCI_START_HALT_TIMEOUT	500
 
 static void cqhci_recovery_start(struct mmc_host *mmc)
 {
@@ -1075,28 +1075,28 @@ static void cqhci_recovery_finish(struct mmc_host *mmc)
 
 	ok = cqhci_halt(mmc, CQHCI_FINISH_HALT_TIMEOUT);
 
-	if (!cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT))
-		ok = false;
-
 	/*
 	 * The specification contradicts itself, by saying that tasks cannot be
 	 * cleared if CQHCI does not halt, but if CQHCI does not halt, it should
 	 * be disabled/re-enabled, but not to disable before clearing tasks.
 	 * Have a go anyway.
 	 */
-	if (!ok) {
-		pr_debug("%s: cqhci: disable / re-enable\n", mmc_hostname(mmc));
-		cqcfg = cqhci_readl(cq_host, CQHCI_CFG);
-		cqcfg &= ~CQHCI_ENABLE;
-		cqhci_writel(cq_host, cqcfg, CQHCI_CFG);
-		cqcfg |= CQHCI_ENABLE;
-		cqhci_writel(cq_host, cqcfg, CQHCI_CFG);
-		/* Be sure that there are no tasks */
-		ok = cqhci_halt(mmc, CQHCI_FINISH_HALT_TIMEOUT);
-		if (!cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT))
-			ok = false;
-		WARN_ON(!ok);
-	}
+	if (!cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT))
+		ok = false;
+
+	/* Disable to make sure tasks really are cleared */
+	cqcfg = cqhci_readl(cq_host, CQHCI_CFG);
+	cqcfg &= ~CQHCI_ENABLE;
+	cqhci_writel(cq_host, cqcfg, CQHCI_CFG);
+
+	cqcfg = cqhci_readl(cq_host, CQHCI_CFG);
+	cqcfg |= CQHCI_ENABLE;
+	cqhci_writel(cq_host, cqcfg, CQHCI_CFG);
+
+	cqhci_halt(mmc, CQHCI_FINISH_HALT_TIMEOUT);
+
+	if (!ok)
+		cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT);
 
 	cqhci_recover_mrqs(cq_host);
 
diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c
index d8a991b349a8..77911a57b12c 100644
--- a/drivers/mmc/host/sdhci-pci-gli.c
+++ b/drivers/mmc/host/sdhci-pci-gli.c
@@ -1189,6 +1189,32 @@ static void gl9763e_hs400_enhanced_strobe(struct mmc_host *mmc,
 	sdhci_writel(host, val, SDHCI_GLI_9763E_HS400_ES_REG);
 }
 
+static void gl9763e_set_low_power_negotiation(struct sdhci_pci_slot *slot,
+					      bool enable)
+{
+	struct pci_dev *pdev = slot->chip->pdev;
+	u32 value;
+
+	pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value);
+	value &= ~GLI_9763E_VHS_REV;
+	value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_W);
+	pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value);
+
+	pci_read_config_dword(pdev, PCIE_GLI_9763E_CFG, &value);
+
+	if (enable)
+		value &= ~GLI_9763E_CFG_LPSN_DIS;
+	else
+		value |= GLI_9763E_CFG_LPSN_DIS;
+
+	pci_write_config_dword(pdev, PCIE_GLI_9763E_CFG, value);
+
+	pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value);
+	value &= ~GLI_9763E_VHS_REV;
+	value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_R);
+	pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value);
+}
+
 static void sdhci_set_gl9763e_signaling(struct sdhci_host *host,
 					unsigned int timing)
 {
@@ -1297,6 +1323,9 @@ static int gl9763e_add_host(struct sdhci_pci_slot *slot)
 	if (ret)
 		goto cleanup;
 
+	/* Disable LPM negotiation to avoid entering L1 state. */
+	gl9763e_set_low_power_negotiation(slot, false);
+
 	return 0;
 
 cleanup:
@@ -1340,31 +1369,6 @@ static void gli_set_gl9763e(struct sdhci_pci_slot *slot)
 }
 
 #ifdef CONFIG_PM
-static void gl9763e_set_low_power_negotiation(struct sdhci_pci_slot *slot, bool enable)
-{
-	struct pci_dev *pdev = slot->chip->pdev;
-	u32 value;
-
-	pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value);
-	value &= ~GLI_9763E_VHS_REV;
-	value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_W);
-	pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value);
-
-	pci_read_config_dword(pdev, PCIE_GLI_9763E_CFG, &value);
-
-	if (enable)
-		value &= ~GLI_9763E_CFG_LPSN_DIS;
-	else
-		value |= GLI_9763E_CFG_LPSN_DIS;
-
-	pci_write_config_dword(pdev, PCIE_GLI_9763E_CFG, value);
-
-	pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value);
-	value &= ~GLI_9763E_VHS_REV;
-	value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_R);
-	pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value);
-}
-
 static int gl9763e_runtime_suspend(struct sdhci_pci_chip *chip)
 {
 	struct sdhci_pci_slot *slot = chip->slots[0];
diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c
index 6b84ba27e6ab..6b8a57e2d20f 100644
--- a/drivers/mmc/host/sdhci-sprd.c
+++ b/drivers/mmc/host/sdhci-sprd.c
@@ -416,12 +416,33 @@ static void sdhci_sprd_request_done(struct sdhci_host *host,
 	mmc_request_done(host->mmc, mrq);
 }
 
+static void sdhci_sprd_set_power(struct sdhci_host *host, unsigned char mode,
+				 unsigned short vdd)
+{
+	struct mmc_host *mmc = host->mmc;
+
+	switch (mode) {
+	case MMC_POWER_OFF:
+		mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, 0);
+
+		mmc_regulator_disable_vqmmc(mmc);
+		break;
+	case MMC_POWER_ON:
+		mmc_regulator_enable_vqmmc(mmc);
+		break;
+	case MMC_POWER_UP:
+		mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, vdd);
+		break;
+	}
+}
+
 static struct sdhci_ops sdhci_sprd_ops = {
 	.read_l = sdhci_sprd_readl,
 	.write_l = sdhci_sprd_writel,
 	.write_w = sdhci_sprd_writew,
 	.write_b = sdhci_sprd_writeb,
 	.set_clock = sdhci_sprd_set_clock,
+	.set_power = sdhci_sprd_set_power,
 	.get_max_clock = sdhci_sprd_get_max_clock,
 	.get_min_clock = sdhci_sprd_get_min_clock,
 	.set_bus_width = sdhci_set_bus_width,
@@ -823,6 +844,10 @@ static int sdhci_sprd_probe(struct platform_device *pdev)
 	host->caps1 &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_SDR104 |
 			 SDHCI_SUPPORT_DDR50);
 
+	ret = mmc_regulator_get_supply(host->mmc);
+	if (ret)
+		goto pm_runtime_disable;
+
 	ret = sdhci_setup_host(host);
 	if (ret)
 		goto pm_runtime_disable;
diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h
index 19e996a829c9..b54275389f8a 100644
--- a/drivers/net/arcnet/arcdevice.h
+++ b/drivers/net/arcnet/arcdevice.h
@@ -186,6 +186,8 @@ do {									\
 #define ARC_IS_5MBIT    1   /* card default speed is 5MBit */
 #define ARC_CAN_10MBIT  2   /* card uses COM20022, supporting 10MBit,
 				 but default is 2.5MBit. */
+#define ARC_HAS_LED     4   /* card has software controlled LEDs */
+#define ARC_HAS_ROTARY  8   /* card has rotary encoder */
 
 /* information needed to define an encapsulation driver */
 struct ArcProto {
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index c580acb8b1d3..7b5c8bb02f11 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -213,12 +213,13 @@ static int com20020pci_probe(struct pci_dev *pdev,
 		if (!strncmp(ci->name, "EAE PLX-PCI FB2", 15))
 			lp->backplane = 1;
 
-		/* Get the dev_id from the PLX rotary coder */
-		if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15))
-			dev_id_mask = 0x3;
-		dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask;
-
-		snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i);
+		if (ci->flags & ARC_HAS_ROTARY) {
+			/* Get the dev_id from the PLX rotary coder */
+			if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15))
+				dev_id_mask = 0x3;
+			dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask;
+			snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i);
+		}
 
 		if (arcnet_inb(ioaddr, COM20020_REG_R_STATUS) == 0xFF) {
 			pr_err("IO address %Xh is empty!\n", ioaddr);
@@ -230,6 +231,10 @@ static int com20020pci_probe(struct pci_dev *pdev,
 			goto err_free_arcdev;
 		}
 
+		ret = com20020_found(dev, IRQF_SHARED);
+		if (ret)
+			goto err_free_arcdev;
+
 		card = devm_kzalloc(&pdev->dev, sizeof(struct com20020_dev),
 				    GFP_KERNEL);
 		if (!card) {
@@ -239,41 +244,39 @@ static int com20020pci_probe(struct pci_dev *pdev,
 
 		card->index = i;
 		card->pci_priv = priv;
-		card->tx_led.brightness_set = led_tx_set;
-		card->tx_led.default_trigger = devm_kasprintf(&pdev->dev,
-						GFP_KERNEL, "arc%d-%d-tx",
-						dev->dev_id, i);
-		card->tx_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
-						"pci:green:tx:%d-%d",
-						dev->dev_id, i);
-
-		card->tx_led.dev = &dev->dev;
-		card->recon_led.brightness_set = led_recon_set;
-		card->recon_led.default_trigger = devm_kasprintf(&pdev->dev,
-						GFP_KERNEL, "arc%d-%d-recon",
-						dev->dev_id, i);
-		card->recon_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
-						"pci:red:recon:%d-%d",
-						dev->dev_id, i);
-		card->recon_led.dev = &dev->dev;
-		card->dev = dev;
-
-		ret = devm_led_classdev_register(&pdev->dev, &card->tx_led);
-		if (ret)
-			goto err_free_arcdev;
 
-		ret = devm_led_classdev_register(&pdev->dev, &card->recon_led);
-		if (ret)
-			goto err_free_arcdev;
-
-		dev_set_drvdata(&dev->dev, card);
-
-		ret = com20020_found(dev, IRQF_SHARED);
-		if (ret)
-			goto err_free_arcdev;
-
-		devm_arcnet_led_init(dev, dev->dev_id, i);
+		if (ci->flags & ARC_HAS_LED) {
+			card->tx_led.brightness_set = led_tx_set;
+			card->tx_led.default_trigger = devm_kasprintf(&pdev->dev,
+							GFP_KERNEL, "arc%d-%d-tx",
+							dev->dev_id, i);
+			card->tx_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+							"pci:green:tx:%d-%d",
+							dev->dev_id, i);
+
+			card->tx_led.dev = &dev->dev;
+			card->recon_led.brightness_set = led_recon_set;
+			card->recon_led.default_trigger = devm_kasprintf(&pdev->dev,
+							GFP_KERNEL, "arc%d-%d-recon",
+							dev->dev_id, i);
+			card->recon_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+							"pci:red:recon:%d-%d",
+							dev->dev_id, i);
+			card->recon_led.dev = &dev->dev;
+
+			ret = devm_led_classdev_register(&pdev->dev, &card->tx_led);
+			if (ret)
+				goto err_free_arcdev;
+
+			ret = devm_led_classdev_register(&pdev->dev, &card->recon_led);
+			if (ret)
+				goto err_free_arcdev;
+
+			dev_set_drvdata(&dev->dev, card);
+			devm_arcnet_led_init(dev, dev->dev_id, i);
+		}
 
+		card->dev = dev;
 		list_add(&card->list, &priv->list_dev);
 		continue;
 
@@ -329,7 +332,7 @@ static struct com20020_pci_card_info card_info_5mbit = {
 };
 
 static struct com20020_pci_card_info card_info_sohard = {
-	.name = "PLX-PCI",
+	.name = "SOHARD SH ARC-PCI",
 	.devcount = 1,
 	/* SOHARD needs PCI base addr 4 */
 	.chan_map_tbl = {
@@ -364,7 +367,7 @@ static struct com20020_pci_card_info card_info_eae_arc1 = {
 		},
 	},
 	.rotary = 0x0,
-	.flags = ARC_CAN_10MBIT,
+	.flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT,
 };
 
 static struct com20020_pci_card_info card_info_eae_ma1 = {
@@ -396,7 +399,7 @@ static struct com20020_pci_card_info card_info_eae_ma1 = {
 		},
 	},
 	.rotary = 0x0,
-	.flags = ARC_CAN_10MBIT,
+	.flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT,
 };
 
 static struct com20020_pci_card_info card_info_eae_fb2 = {
@@ -421,7 +424,7 @@ static struct com20020_pci_card_info card_info_eae_fb2 = {
 		},
 	},
 	.rotary = 0x0,
-	.flags = ARC_CAN_10MBIT,
+	.flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT,
 };
 
 static const struct pci_device_id com20020pci_id_table[] = {
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 3fed406fb46a..ff4b39601c93 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -2713,10 +2713,18 @@ static int ksz_connect_tag_protocol(struct dsa_switch *ds,
 {
 	struct ksz_tagger_data *tagger_data;
 
-	tagger_data = ksz_tagger_data(ds);
-	tagger_data->xmit_work_fn = ksz_port_deferred_xmit;
-
-	return 0;
+	switch (proto) {
+	case DSA_TAG_PROTO_KSZ8795:
+		return 0;
+	case DSA_TAG_PROTO_KSZ9893:
+	case DSA_TAG_PROTO_KSZ9477:
+	case DSA_TAG_PROTO_LAN937X:
+		tagger_data = ksz_tagger_data(ds);
+		tagger_data->xmit_work_fn = ksz_port_deferred_xmit;
+		return 0;
+	default:
+		return -EPROTONOSUPPORT;
+	}
 }
 
 static int ksz_port_vlan_filtering(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 42b1acaca33a..07a22c74fe81 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -577,6 +577,18 @@ static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
 	config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100;
 }
 
+static void mv88e6351_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+				       struct phylink_config *config)
+{
+	unsigned long *supported = config->supported_interfaces;
+
+	/* Translate the default cmode */
+	mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
+
+	config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 |
+				   MAC_1000FD;
+}
+
 static int mv88e6352_get_port4_serdes_cmode(struct mv88e6xxx_chip *chip)
 {
 	u16 reg, val;
@@ -3880,7 +3892,8 @@ static int mv88e6xxx_port_setup(struct dsa_switch *ds, int port)
 	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
-	if (chip->info->ops->pcs_ops->pcs_init) {
+	if (chip->info->ops->pcs_ops &&
+	    chip->info->ops->pcs_ops->pcs_init) {
 		err = chip->info->ops->pcs_ops->pcs_init(chip, port);
 		if (err)
 			return err;
@@ -3895,7 +3908,8 @@ static void mv88e6xxx_port_teardown(struct dsa_switch *ds, int port)
 
 	mv88e6xxx_teardown_devlink_regions_port(ds, port);
 
-	if (chip->info->ops->pcs_ops->pcs_teardown)
+	if (chip->info->ops->pcs_ops &&
+	    chip->info->ops->pcs_ops->pcs_teardown)
 		chip->info->ops->pcs_ops->pcs_teardown(chip, port);
 }
 
@@ -4340,7 +4354,7 @@ static const struct mv88e6xxx_ops mv88e6171_ops = {
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.stu_getnext = mv88e6352_g1_stu_getnext,
 	.stu_loadpurge = mv88e6352_g1_stu_loadpurge,
-	.phylink_get_caps = mv88e6185_phylink_get_caps,
+	.phylink_get_caps = mv88e6351_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6172_ops = {
@@ -4440,7 +4454,7 @@ static const struct mv88e6xxx_ops mv88e6175_ops = {
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.stu_getnext = mv88e6352_g1_stu_getnext,
 	.stu_loadpurge = mv88e6352_g1_stu_loadpurge,
-	.phylink_get_caps = mv88e6185_phylink_get_caps,
+	.phylink_get_caps = mv88e6351_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6176_ops = {
@@ -5069,7 +5083,7 @@ static const struct mv88e6xxx_ops mv88e6350_ops = {
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.stu_getnext = mv88e6352_g1_stu_getnext,
 	.stu_loadpurge = mv88e6352_g1_stu_loadpurge,
-	.phylink_get_caps = mv88e6185_phylink_get_caps,
+	.phylink_get_caps = mv88e6351_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6351_ops = {
@@ -5117,7 +5131,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
 	.stu_loadpurge = mv88e6352_g1_stu_loadpurge,
 	.avb_ops = &mv88e6352_avb_ops,
 	.ptp_ops = &mv88e6352_ptp_ops,
-	.phylink_get_caps = mv88e6185_phylink_get_caps,
+	.phylink_get_caps = mv88e6351_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6352_ops = {
diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
index 9a8429f5d09c..d758a6c1b226 100644
--- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c
+++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
@@ -465,6 +465,7 @@ mv88e639x_pcs_select(struct mv88e6xxx_chip *chip, int port,
 	case PHY_INTERFACE_MODE_10GBASER:
 	case PHY_INTERFACE_MODE_XAUI:
 	case PHY_INTERFACE_MODE_RXAUI:
+	case PHY_INTERFACE_MODE_USXGMII:
 		return &mpcs->xg_pcs;
 
 	default:
@@ -873,7 +874,8 @@ static int mv88e6393x_xg_pcs_post_config(struct phylink_pcs *pcs,
 	struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs);
 	int err;
 
-	if (interface == PHY_INTERFACE_MODE_10GBASER) {
+	if (interface == PHY_INTERFACE_MODE_10GBASER ||
+	    interface == PHY_INTERFACE_MODE_USXGMII) {
 		err = mv88e6393x_erratum_5_2(mpcs);
 		if (err)
 			return err;
@@ -886,12 +888,37 @@ static int mv88e6393x_xg_pcs_post_config(struct phylink_pcs *pcs,
 	return mv88e639x_xg_pcs_enable(mpcs);
 }
 
+static void mv88e6393x_xg_pcs_get_state(struct phylink_pcs *pcs,
+					struct phylink_link_state *state)
+{
+	struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs);
+	u16 status, lp_status;
+	int err;
+
+	if (state->interface != PHY_INTERFACE_MODE_USXGMII)
+		return mv88e639x_xg_pcs_get_state(pcs, state);
+
+	state->link = false;
+
+	err = mv88e639x_read(mpcs, MV88E6390_USXGMII_PHY_STATUS, &status);
+	err = err ? : mv88e639x_read(mpcs, MV88E6390_USXGMII_LP_STATUS, &lp_status);
+	if (err) {
+		dev_err(mpcs->mdio.dev.parent,
+			"can't read USXGMII status: %pe\n", ERR_PTR(err));
+		return;
+	}
+
+	state->link = !!(status & MDIO_USXGMII_LINK);
+	state->an_complete = state->link;
+	phylink_decode_usxgmii_word(state, lp_status);
+}
+
 static const struct phylink_pcs_ops mv88e6393x_xg_pcs_ops = {
 	.pcs_enable = mv88e6393x_xg_pcs_enable,
 	.pcs_disable = mv88e6393x_xg_pcs_disable,
 	.pcs_pre_config = mv88e6393x_xg_pcs_pre_config,
 	.pcs_post_config = mv88e6393x_xg_pcs_post_config,
-	.pcs_get_state = mv88e639x_xg_pcs_get_state,
+	.pcs_get_state = mv88e6393x_xg_pcs_get_state,
 	.pcs_config = mv88e639x_xg_pcs_config,
 };
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
index 80b44043e6c5..28c9b6f1a54f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
@@ -553,17 +553,17 @@ void aq_ptp_tx_hwtstamp(struct aq_nic_s *aq_nic, u64 timestamp)
 
 /* aq_ptp_rx_hwtstamp - utility function which checks for RX time stamp
  * @adapter: pointer to adapter struct
- * @skb: particular skb to send timestamp with
+ * @shhwtstamps: particular skb_shared_hwtstamps to save timestamp
  *
  * if the timestamp is valid, we convert it into the timecounter ns
  * value, then store that result into the hwtstamps structure which
  * is passed up the network stack
  */
-static void aq_ptp_rx_hwtstamp(struct aq_ptp_s *aq_ptp, struct sk_buff *skb,
+static void aq_ptp_rx_hwtstamp(struct aq_ptp_s *aq_ptp, struct skb_shared_hwtstamps *shhwtstamps,
 			       u64 timestamp)
 {
 	timestamp -= atomic_read(&aq_ptp->offset_ingress);
-	aq_ptp_convert_to_hwtstamp(aq_ptp, skb_hwtstamps(skb), timestamp);
+	aq_ptp_convert_to_hwtstamp(aq_ptp, shhwtstamps, timestamp);
 }
 
 void aq_ptp_hwtstamp_config_get(struct aq_ptp_s *aq_ptp,
@@ -639,7 +639,7 @@ bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring)
 	       &aq_ptp->ptp_rx == ring || &aq_ptp->hwts_rx == ring;
 }
 
-u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p,
+u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct skb_shared_hwtstamps *shhwtstamps, u8 *p,
 		      unsigned int len)
 {
 	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
@@ -648,7 +648,7 @@ u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p,
 						   p, len, &timestamp);
 
 	if (ret > 0)
-		aq_ptp_rx_hwtstamp(aq_ptp, skb, timestamp);
+		aq_ptp_rx_hwtstamp(aq_ptp, shhwtstamps, timestamp);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h
index 28ccb7ca2df9..210b723f2207 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h
@@ -67,7 +67,7 @@ int aq_ptp_hwtstamp_config_set(struct aq_ptp_s *aq_ptp,
 /* Return either ring is belong to PTP or not*/
 bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring);
 
-u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p,
+u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct skb_shared_hwtstamps *shhwtstamps, u8 *p,
 		      unsigned int len);
 
 struct ptp_clock *aq_ptp_get_ptp_clock(struct aq_ptp_s *aq_ptp);
@@ -143,7 +143,7 @@ static inline bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring)
 }
 
 static inline u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic,
-				    struct sk_buff *skb, u8 *p,
+				    struct skb_shared_hwtstamps *shhwtstamps, u8 *p,
 				    unsigned int len)
 {
 	return 0;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 4de22eed099a..694daeaf3e61 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -647,7 +647,7 @@ static int __aq_ring_rx_clean(struct aq_ring_s *self, struct napi_struct *napi,
 		}
 		if (is_ptp_ring)
 			buff->len -=
-				aq_ptp_extract_ts(self->aq_nic, skb,
+				aq_ptp_extract_ts(self->aq_nic, skb_hwtstamps(skb),
 						  aq_buf_vaddr(&buff->rxdata),
 						  buff->len);
 
@@ -742,6 +742,8 @@ static int __aq_ring_xdp_clean(struct aq_ring_s *rx_ring,
 		struct aq_ring_buff_s *buff = &rx_ring->buff_ring[rx_ring->sw_head];
 		bool is_ptp_ring = aq_ptp_ring(rx_ring->aq_nic, rx_ring);
 		struct aq_ring_buff_s *buff_ = NULL;
+		u16 ptp_hwtstamp_len = 0;
+		struct skb_shared_hwtstamps shhwtstamps;
 		struct sk_buff *skb = NULL;
 		unsigned int next_ = 0U;
 		struct xdp_buff xdp;
@@ -810,11 +812,12 @@ static int __aq_ring_xdp_clean(struct aq_ring_s *rx_ring,
 		hard_start = page_address(buff->rxdata.page) +
 			     buff->rxdata.pg_off - rx_ring->page_offset;
 
-		if (is_ptp_ring)
-			buff->len -=
-				aq_ptp_extract_ts(rx_ring->aq_nic, skb,
-						  aq_buf_vaddr(&buff->rxdata),
-						  buff->len);
+		if (is_ptp_ring) {
+			ptp_hwtstamp_len = aq_ptp_extract_ts(rx_ring->aq_nic, &shhwtstamps,
+							     aq_buf_vaddr(&buff->rxdata),
+							     buff->len);
+			buff->len -= ptp_hwtstamp_len;
+		}
 
 		xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
 		xdp_prepare_buff(&xdp, hard_start, rx_ring->page_offset,
@@ -834,6 +837,9 @@ static int __aq_ring_xdp_clean(struct aq_ring_s *rx_ring,
 		if (IS_ERR(skb) || !skb)
 			continue;
 
+		if (ptp_hwtstamp_len > 0)
+			*skb_hwtstamps(skb) = shhwtstamps;
+
 		if (buff->is_vlan)
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 					       buff->vlan_rx_tag);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 38d89d80b4a9..273c9ba48f09 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -2075,6 +2075,7 @@ destroy_flow_table:
 	rhashtable_destroy(&tc_info->flow_table);
 free_tc_info:
 	kfree(tc_info);
+	bp->tc_info = NULL;
 	return rc;
 }
 
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 48b6191efa56..f52830dfb26a 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6474,6 +6474,14 @@ static void tg3_dump_state(struct tg3 *tp)
 	int i;
 	u32 *regs;
 
+	/* If it is a PCI error, all registers will be 0xffff,
+	 * we don't dump them out, just report the error and return
+	 */
+	if (tp->pdev->error_state != pci_channel_io_normal) {
+		netdev_err(tp->dev, "PCI channel ERROR!\n");
+		return;
+	}
+
 	regs = kzalloc(TG3_REG_BLK_SIZE, GFP_ATOMIC);
 	if (!regs)
 		return;
@@ -11259,7 +11267,8 @@ static void tg3_reset_task(struct work_struct *work)
 	rtnl_lock();
 	tg3_full_lock(tp, 0);
 
-	if (tp->pcierr_recovery || !netif_running(tp->dev)) {
+	if (tp->pcierr_recovery || !netif_running(tp->dev) ||
+	    tp->pdev->error_state != pci_channel_io_normal) {
 		tg3_flag_clear(tp, RESET_TASK_PENDING);
 		tg3_full_unlock(tp);
 		rtnl_unlock();
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 15bab41cee48..888509cf1f21 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -516,8 +516,6 @@ struct sk_buff *dpaa2_eth_alloc_skb(struct dpaa2_eth_priv *priv,
 
 	memcpy(skb->data, fd_vaddr + fd_offset, fd_length);
 
-	dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(fd));
-
 	return skb;
 }
 
@@ -589,6 +587,7 @@ void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
 	struct rtnl_link_stats64 *percpu_stats;
 	struct dpaa2_eth_drv_stats *percpu_extras;
 	struct device *dev = priv->net_dev->dev.parent;
+	bool recycle_rx_buf = false;
 	void *buf_data;
 	u32 xdp_act;
 
@@ -618,6 +617,8 @@ void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
 			dma_unmap_page(dev, addr, priv->rx_buf_size,
 				       DMA_BIDIRECTIONAL);
 			skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr);
+		} else {
+			recycle_rx_buf = true;
 		}
 	} else if (fd_format == dpaa2_fd_sg) {
 		WARN_ON(priv->xdp_prog);
@@ -637,6 +638,9 @@ void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
 		goto err_build_skb;
 
 	dpaa2_eth_receive_skb(priv, ch, fd, vaddr, fq, percpu_stats, skb);
+
+	if (recycle_rx_buf)
+		dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(fd));
 	return;
 
 err_build_skb:
@@ -1073,14 +1077,12 @@ static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv,
 	dma_addr_t addr;
 
 	buffer_start = skb->data - dpaa2_eth_needed_headroom(skb);
-
-	/* If there's enough room to align the FD address, do it.
-	 * It will help hardware optimize accesses.
-	 */
 	aligned_start = PTR_ALIGN(buffer_start - DPAA2_ETH_TX_BUF_ALIGN,
 				  DPAA2_ETH_TX_BUF_ALIGN);
 	if (aligned_start >= skb->head)
 		buffer_start = aligned_start;
+	else
+		return -ENOMEM;
 
 	/* Store a backpointer to the skb at the beginning of the buffer
 	 * (in the private data area) such that we can release it
@@ -4967,6 +4969,8 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 	if (err)
 		goto err_dl_port_add;
 
+	net_dev->needed_headroom = DPAA2_ETH_SWA_SIZE + DPAA2_ETH_TX_BUF_ALIGN;
+
 	err = register_netdev(net_dev);
 	if (err < 0) {
 		dev_err(dev, "register_netdev() failed\n");
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index bfb6c96c3b2f..834cba8c3a41 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -740,7 +740,7 @@ static inline bool dpaa2_eth_rx_pause_enabled(u64 link_options)
 
 static inline unsigned int dpaa2_eth_needed_headroom(struct sk_buff *skb)
 {
-	unsigned int headroom = DPAA2_ETH_SWA_SIZE;
+	unsigned int headroom = DPAA2_ETH_SWA_SIZE + DPAA2_ETH_TX_BUF_ALIGN;
 
 	/* If we don't have an skb (e.g. XDP buffer), we only need space for
 	 * the software annotation area
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 928d934cb21a..f75668c47935 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -66,6 +66,27 @@ static enum mac_mode hns_get_enet_interface(const struct hns_mac_cb *mac_cb)
 	}
 }
 
+static u32 hns_mac_link_anti_shake(struct mac_driver *mac_ctrl_drv)
+{
+#define HNS_MAC_LINK_WAIT_TIME 5
+#define HNS_MAC_LINK_WAIT_CNT 40
+
+	u32 link_status = 0;
+	int i;
+
+	if (!mac_ctrl_drv->get_link_status)
+		return link_status;
+
+	for (i = 0; i < HNS_MAC_LINK_WAIT_CNT; i++) {
+		msleep(HNS_MAC_LINK_WAIT_TIME);
+		mac_ctrl_drv->get_link_status(mac_ctrl_drv, &link_status);
+		if (!link_status)
+			break;
+	}
+
+	return link_status;
+}
+
 void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status)
 {
 	struct mac_driver *mac_ctrl_drv;
@@ -83,6 +104,14 @@ void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status)
 							       &sfp_prsnt);
 		if (!ret)
 			*link_status = *link_status && sfp_prsnt;
+
+		/* for FIBER port, it may have a fake link up.
+		 * when the link status changes from down to up, we need to do
+		 * anti-shake. the anti-shake time is base on tests.
+		 * only FIBER port need to do this.
+		 */
+		if (*link_status && !mac_cb->link)
+			*link_status = hns_mac_link_anti_shake(mac_ctrl_drv);
 	}
 
 	mac_cb->link = *link_status;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 0900abf5c508..8a713eed4465 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -142,7 +142,8 @@ MODULE_DEVICE_TABLE(acpi, hns_enet_acpi_match);
 
 static void fill_desc(struct hnae_ring *ring, void *priv,
 		      int size, dma_addr_t dma, int frag_end,
-		      int buf_num, enum hns_desc_type type, int mtu)
+		      int buf_num, enum hns_desc_type type, int mtu,
+		      bool is_gso)
 {
 	struct hnae_desc *desc = &ring->desc[ring->next_to_use];
 	struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
@@ -275,6 +276,15 @@ static int hns_nic_maybe_stop_tso(
 	return 0;
 }
 
+static int hns_nic_maybe_stop_tx_v2(struct sk_buff **out_skb, int *bnum,
+				    struct hnae_ring *ring)
+{
+	if (skb_is_gso(*out_skb))
+		return hns_nic_maybe_stop_tso(out_skb, bnum, ring);
+	else
+		return hns_nic_maybe_stop_tx(out_skb, bnum, ring);
+}
+
 static void fill_tso_desc(struct hnae_ring *ring, void *priv,
 			  int size, dma_addr_t dma, int frag_end,
 			  int buf_num, enum hns_desc_type type, int mtu)
@@ -300,6 +310,19 @@ static void fill_tso_desc(struct hnae_ring *ring, void *priv,
 				mtu);
 }
 
+static void fill_desc_v2(struct hnae_ring *ring, void *priv,
+			 int size, dma_addr_t dma, int frag_end,
+			 int buf_num, enum hns_desc_type type, int mtu,
+			 bool is_gso)
+{
+	if (is_gso)
+		fill_tso_desc(ring, priv, size, dma, frag_end, buf_num, type,
+			      mtu);
+	else
+		fill_v2_desc(ring, priv, size, dma, frag_end, buf_num, type,
+			     mtu);
+}
+
 netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev,
 				struct sk_buff *skb,
 				struct hns_nic_ring_data *ring_data)
@@ -313,6 +336,7 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev,
 	int seg_num;
 	dma_addr_t dma;
 	int size, next_to_use;
+	bool is_gso;
 	int i;
 
 	switch (priv->ops.maybe_stop_tx(&skb, &buf_num, ring)) {
@@ -339,8 +363,9 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev,
 		ring->stats.sw_err_cnt++;
 		goto out_err_tx_ok;
 	}
+	is_gso = skb_is_gso(skb);
 	priv->ops.fill_desc(ring, skb, size, dma, seg_num == 1 ? 1 : 0,
-			    buf_num, DESC_TYPE_SKB, ndev->mtu);
+			    buf_num, DESC_TYPE_SKB, ndev->mtu, is_gso);
 
 	/* fill the fragments */
 	for (i = 1; i < seg_num; i++) {
@@ -354,7 +379,7 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev,
 		}
 		priv->ops.fill_desc(ring, skb_frag_page(frag), size, dma,
 				    seg_num - 1 == i ? 1 : 0, buf_num,
-				    DESC_TYPE_PAGE, ndev->mtu);
+				    DESC_TYPE_PAGE, ndev->mtu, is_gso);
 	}
 
 	/*complete translate all packets*/
@@ -1776,15 +1801,6 @@ static int hns_nic_set_features(struct net_device *netdev,
 			netdev_info(netdev, "enet v1 do not support tso!\n");
 		break;
 	default:
-		if (features & (NETIF_F_TSO | NETIF_F_TSO6)) {
-			priv->ops.fill_desc = fill_tso_desc;
-			priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso;
-			/* The chip only support 7*4096 */
-			netif_set_tso_max_size(netdev, 7 * 4096);
-		} else {
-			priv->ops.fill_desc = fill_v2_desc;
-			priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx;
-		}
 		break;
 	}
 	netdev->features = features;
@@ -2159,16 +2175,9 @@ static void hns_nic_set_priv_ops(struct net_device *netdev)
 		priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx;
 	} else {
 		priv->ops.get_rxd_bnum = get_v2rx_desc_bnum;
-		if ((netdev->features & NETIF_F_TSO) ||
-		    (netdev->features & NETIF_F_TSO6)) {
-			priv->ops.fill_desc = fill_tso_desc;
-			priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso;
-			/* This chip only support 7*4096 */
-			netif_set_tso_max_size(netdev, 7 * 4096);
-		} else {
-			priv->ops.fill_desc = fill_v2_desc;
-			priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx;
-		}
+		priv->ops.fill_desc = fill_desc_v2;
+		priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx_v2;
+		netif_set_tso_max_size(netdev, 7 * 4096);
 		/* enable tso when init
 		 * control tso on/off through TSE bit in bd
 		 */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
index ffa9d6573f54..3f3ee032f631 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
@@ -44,7 +44,8 @@ struct hns_nic_ring_data {
 struct hns_nic_ops {
 	void (*fill_desc)(struct hnae_ring *ring, void *priv,
 			  int size, dma_addr_t dma, int frag_end,
-			  int buf_num, enum hns_desc_type type, int mtu);
+			  int buf_num, enum hns_desc_type type, int mtu,
+			  bool is_gso);
 	int (*maybe_stop_tx)(struct sk_buff **out_skb,
 			     int *bnum, struct hnae_ring *ring);
 	void (*get_rxd_bnum)(u32 bnum_flag, int *out_bnum);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f7a332e51524..1ab8dbe2d880 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -16224,7 +16224,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	       I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT;
 	if (val < MAX_FRAME_SIZE_DEFAULT)
 		dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n",
-			 i, val);
+			 pf->hw.port, val);
 
 	/* Add a filter to drop all Flow control frames from any VSI from being
 	 * transmitted. By doing so we stop a malicious VF from sending out
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 6f236d1a6444..19cbfe554689 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -827,18 +827,10 @@ static int __iavf_set_coalesce(struct net_device *netdev,
 	struct iavf_adapter *adapter = netdev_priv(netdev);
 	int i;
 
-	if (ec->rx_coalesce_usecs == 0) {
-		if (ec->use_adaptive_rx_coalesce)
-			netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
-	} else if ((ec->rx_coalesce_usecs < IAVF_MIN_ITR) ||
-		   (ec->rx_coalesce_usecs > IAVF_MAX_ITR)) {
+	if (ec->rx_coalesce_usecs > IAVF_MAX_ITR) {
 		netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
 		return -EINVAL;
-	} else if (ec->tx_coalesce_usecs == 0) {
-		if (ec->use_adaptive_tx_coalesce)
-			netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
-	} else if ((ec->tx_coalesce_usecs < IAVF_MIN_ITR) ||
-		   (ec->tx_coalesce_usecs > IAVF_MAX_ITR)) {
+	} else if (ec->tx_coalesce_usecs > IAVF_MAX_ITR) {
 		netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
 		return -EINVAL;
 	}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
index 7e6ee32d19b6..10ba36602c0c 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
@@ -15,7 +15,6 @@
  */
 #define IAVF_ITR_DYNAMIC	0x8000	/* use top bit as a flag */
 #define IAVF_ITR_MASK		0x1FFE	/* mask for ITR register value */
-#define IAVF_MIN_ITR		     2	/* reg uses 2 usec resolution */
 #define IAVF_ITR_100K		    10	/* all values below must be even */
 #define IAVF_ITR_50K		    20
 #define IAVF_ITR_20K		    50
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
index cd065ec48c87..280994ee5933 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -570,6 +570,50 @@ resume_traffic:
 }
 
 /**
+ * ice_lag_build_netdev_list - populate the lag struct's netdev list
+ * @lag: local lag struct
+ * @ndlist: pointer to netdev list to populate
+ */
+static void ice_lag_build_netdev_list(struct ice_lag *lag,
+				      struct ice_lag_netdev_list *ndlist)
+{
+	struct ice_lag_netdev_list *nl;
+	struct net_device *tmp_nd;
+
+	INIT_LIST_HEAD(&ndlist->node);
+	rcu_read_lock();
+	for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
+		nl = kzalloc(sizeof(*nl), GFP_ATOMIC);
+		if (!nl)
+			break;
+
+		nl->netdev = tmp_nd;
+		list_add(&nl->node, &ndlist->node);
+	}
+	rcu_read_unlock();
+	lag->netdev_head = &ndlist->node;
+}
+
+/**
+ * ice_lag_destroy_netdev_list - free lag struct's netdev list
+ * @lag: pointer to local lag struct
+ * @ndlist: pointer to lag struct netdev list
+ */
+static void ice_lag_destroy_netdev_list(struct ice_lag *lag,
+					struct ice_lag_netdev_list *ndlist)
+{
+	struct ice_lag_netdev_list *entry, *n;
+
+	rcu_read_lock();
+	list_for_each_entry_safe(entry, n, &ndlist->node, node) {
+		list_del(&entry->node);
+		kfree(entry);
+	}
+	rcu_read_unlock();
+	lag->netdev_head = NULL;
+}
+
+/**
  * ice_lag_move_single_vf_nodes - Move Tx scheduling nodes for single VF
  * @lag: primary interface LAG struct
  * @oldport: lport of previous interface
@@ -597,7 +641,6 @@ ice_lag_move_single_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport,
 void ice_lag_move_new_vf_nodes(struct ice_vf *vf)
 {
 	struct ice_lag_netdev_list ndlist;
-	struct list_head *tmp, *n;
 	u8 pri_port, act_port;
 	struct ice_lag *lag;
 	struct ice_vsi *vsi;
@@ -621,38 +664,15 @@ void ice_lag_move_new_vf_nodes(struct ice_vf *vf)
 	pri_port = pf->hw.port_info->lport;
 	act_port = lag->active_port;
 
-	if (lag->upper_netdev) {
-		struct ice_lag_netdev_list *nl;
-		struct net_device *tmp_nd;
-
-		INIT_LIST_HEAD(&ndlist.node);
-		rcu_read_lock();
-		for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
-			nl = kzalloc(sizeof(*nl), GFP_ATOMIC);
-			if (!nl)
-				break;
-
-			nl->netdev = tmp_nd;
-			list_add(&nl->node, &ndlist.node);
-		}
-		rcu_read_unlock();
-	}
-
-	lag->netdev_head = &ndlist.node;
+	if (lag->upper_netdev)
+		ice_lag_build_netdev_list(lag, &ndlist);
 
 	if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) &&
 	    lag->bonded && lag->primary && pri_port != act_port &&
 	    !list_empty(lag->netdev_head))
 		ice_lag_move_single_vf_nodes(lag, pri_port, act_port, vsi->idx);
 
-	list_for_each_safe(tmp, n, &ndlist.node) {
-		struct ice_lag_netdev_list *entry;
-
-		entry = list_entry(tmp, struct ice_lag_netdev_list, node);
-		list_del(&entry->node);
-		kfree(entry);
-	}
-	lag->netdev_head = NULL;
+	ice_lag_destroy_netdev_list(lag, &ndlist);
 
 new_vf_unlock:
 	mutex_unlock(&pf->lag_mutex);
@@ -679,6 +699,29 @@ static void ice_lag_move_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport)
 			ice_lag_move_single_vf_nodes(lag, oldport, newport, i);
 }
 
+/**
+ * ice_lag_move_vf_nodes_cfg - move vf nodes outside LAG netdev event context
+ * @lag: local lag struct
+ * @src_prt: lport value for source port
+ * @dst_prt: lport value for destination port
+ *
+ * This function is used to move nodes during an out-of-netdev-event situation,
+ * primarily when the driver needs to reconfigure or recreate resources.
+ *
+ * Must be called while holding the lag_mutex to avoid lag events from
+ * processing while out-of-sync moves are happening.  Also, paired moves,
+ * such as used in a reset flow, should both be called under the same mutex
+ * lock to avoid changes between start of reset and end of reset.
+ */
+void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt)
+{
+	struct ice_lag_netdev_list ndlist;
+
+	ice_lag_build_netdev_list(lag, &ndlist);
+	ice_lag_move_vf_nodes(lag, src_prt, dst_prt);
+	ice_lag_destroy_netdev_list(lag, &ndlist);
+}
+
 #define ICE_LAG_SRIOV_CP_RECIPE		10
 #define ICE_LAG_SRIOV_TRAIN_PKT_LEN	16
 
@@ -2051,7 +2094,6 @@ void ice_lag_rebuild(struct ice_pf *pf)
 {
 	struct ice_lag_netdev_list ndlist;
 	struct ice_lag *lag, *prim_lag;
-	struct list_head *tmp, *n;
 	u8 act_port, loc_port;
 
 	if (!pf->lag || !pf->lag->bonded)
@@ -2063,21 +2105,7 @@ void ice_lag_rebuild(struct ice_pf *pf)
 	if (lag->primary) {
 		prim_lag = lag;
 	} else {
-		struct ice_lag_netdev_list *nl;
-		struct net_device *tmp_nd;
-
-		INIT_LIST_HEAD(&ndlist.node);
-		rcu_read_lock();
-		for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
-			nl = kzalloc(sizeof(*nl), GFP_ATOMIC);
-			if (!nl)
-				break;
-
-			nl->netdev = tmp_nd;
-			list_add(&nl->node, &ndlist.node);
-		}
-		rcu_read_unlock();
-		lag->netdev_head = &ndlist.node;
+		ice_lag_build_netdev_list(lag, &ndlist);
 		prim_lag = ice_lag_find_primary(lag);
 	}
 
@@ -2107,13 +2135,7 @@ void ice_lag_rebuild(struct ice_pf *pf)
 
 	ice_clear_rdma_cap(pf);
 lag_rebuild_out:
-	list_for_each_safe(tmp, n, &ndlist.node) {
-		struct ice_lag_netdev_list *entry;
-
-		entry = list_entry(tmp, struct ice_lag_netdev_list, node);
-		list_del(&entry->node);
-		kfree(entry);
-	}
+	ice_lag_destroy_netdev_list(lag, &ndlist);
 	mutex_unlock(&pf->lag_mutex);
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h
index 9557e8605a07..ede833dfa658 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.h
+++ b/drivers/net/ethernet/intel/ice/ice_lag.h
@@ -65,4 +65,5 @@ int ice_init_lag(struct ice_pf *pf);
 void ice_deinit_lag(struct ice_pf *pf);
 void ice_lag_rebuild(struct ice_pf *pf);
 bool ice_lag_is_switchdev_running(struct ice_pf *pf);
+void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt);
 #endif /* _ICE_LAG_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index 2a5e6616cc0a..e1494f24f661 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -374,16 +374,11 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
  */
 int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector)
 {
-	struct ice_pf *pf;
-
 	if (!vf || !q_vector)
 		return -EINVAL;
 
-	pf = vf->pf;
-
 	/* always add one to account for the OICR being the first MSIX */
-	return pf->sriov_base_vector + pf->vfs.num_msix_per * vf->vf_id +
-		q_vector->v_idx + 1;
+	return vf->first_vector_idx + q_vector->v_idx + 1;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index aca1f2ea5034..b7ae09952156 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -829,12 +829,16 @@ static void ice_notify_vf_reset(struct ice_vf *vf)
 int ice_reset_vf(struct ice_vf *vf, u32 flags)
 {
 	struct ice_pf *pf = vf->pf;
+	struct ice_lag *lag;
 	struct ice_vsi *vsi;
+	u8 act_prt, pri_prt;
 	struct device *dev;
 	int err = 0;
 	bool rsd;
 
 	dev = ice_pf_to_dev(pf);
+	act_prt = ICE_LAG_INVALID_PORT;
+	pri_prt = pf->hw.port_info->lport;
 
 	if (flags & ICE_VF_RESET_NOTIFY)
 		ice_notify_vf_reset(vf);
@@ -845,6 +849,17 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
 		return 0;
 	}
 
+	lag = pf->lag;
+	mutex_lock(&pf->lag_mutex);
+	if (lag && lag->bonded && lag->primary) {
+		act_prt = lag->active_port;
+		if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT &&
+		    lag->upper_netdev)
+			ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt);
+		else
+			act_prt = ICE_LAG_INVALID_PORT;
+	}
+
 	if (flags & ICE_VF_RESET_LOCK)
 		mutex_lock(&vf->cfg_lock);
 	else
@@ -937,6 +952,11 @@ out_unlock:
 	if (flags & ICE_VF_RESET_LOCK)
 		mutex_unlock(&vf->cfg_lock);
 
+	if (lag && lag->bonded && lag->primary &&
+	    act_prt != ICE_LAG_INVALID_PORT)
+		ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt);
+	mutex_unlock(&pf->lag_mutex);
+
 	return err;
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
index d7b10dc67f03..80dc4bcdd3a4 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
@@ -32,7 +32,6 @@ static void ice_port_vlan_on(struct ice_vsi *vsi)
 		/* setup outer VLAN ops */
 		vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
 		vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
-		vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
 
 		/* setup inner VLAN ops */
 		vlan_ops = &vsi->inner_vlan_ops;
@@ -47,8 +46,13 @@ static void ice_port_vlan_on(struct ice_vsi *vsi)
 
 		vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
 		vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan;
-		vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan;
 	}
+
+	/* all Rx traffic should be in the domain of the assigned port VLAN,
+	 * so prevent disabling Rx VLAN filtering
+	 */
+	vlan_ops->dis_rx_filtering = noop_vlan;
+
 	vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering;
 }
 
@@ -77,6 +81,8 @@ static void ice_port_vlan_off(struct ice_vsi *vsi)
 		vlan_ops->del_vlan = ice_vsi_del_vlan;
 	}
 
+	vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+
 	if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags))
 		vlan_ops->ena_rx_filtering = noop_vlan;
 	else
@@ -141,7 +147,6 @@ void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi)
 		&vsi->outer_vlan_ops : &vsi->inner_vlan_ops;
 
 	vlan_ops->add_vlan = ice_vsi_add_vlan;
-	vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
 	vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering;
 	vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index cdf17b1e2f25..1c7b4ded948b 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -1523,7 +1523,6 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 	u16 num_q_vectors_mapped, vsi_id, vector_id;
 	struct virtchnl_irq_map_info *irqmap_info;
 	struct virtchnl_vector_map *map;
-	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
 	int i;
 
@@ -1535,7 +1534,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 	 * there is actually at least a single VF queue vector mapped
 	 */
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
-	    pf->vfs.num_msix_per < num_q_vectors_mapped ||
+	    vf->num_msix < num_q_vectors_mapped ||
 	    !num_q_vectors_mapped) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -1557,7 +1556,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 		/* vector_id is always 0-based for each VF, and can never be
 		 * larger than or equal to the max allowed interrupts per VF
 		 */
-		if (!(vector_id < pf->vfs.num_msix_per) ||
+		if (!(vector_id < vf->num_msix) ||
 		    !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
 		    (!vector_id && (map->rxq_map || map->txq_map))) {
 			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -1603,9 +1602,24 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 	    (struct virtchnl_vsi_queue_config_info *)msg;
 	struct virtchnl_queue_pair_info *qpi;
 	struct ice_pf *pf = vf->pf;
+	struct ice_lag *lag;
 	struct ice_vsi *vsi;
+	u8 act_prt, pri_prt;
 	int i = -1, q_idx;
 
+	lag = pf->lag;
+	mutex_lock(&pf->lag_mutex);
+	act_prt = ICE_LAG_INVALID_PORT;
+	pri_prt = pf->hw.port_info->lport;
+	if (lag && lag->bonded && lag->primary) {
+		act_prt = lag->active_port;
+		if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT &&
+		    lag->upper_netdev)
+			ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt);
+		else
+			act_prt = ICE_LAG_INVALID_PORT;
+	}
+
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
 		goto error_param;
 
@@ -1729,6 +1743,11 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 		}
 	}
 
+	if (lag && lag->bonded && lag->primary &&
+	    act_prt != ICE_LAG_INVALID_PORT)
+		ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt);
+	mutex_unlock(&pf->lag_mutex);
+
 	/* send the response to the VF */
 	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
 				     VIRTCHNL_STATUS_SUCCESS, NULL, 0);
@@ -1743,6 +1762,11 @@ error_param:
 				vf->vf_id, i);
 	}
 
+	if (lag && lag->bonded && lag->primary &&
+	    act_prt != ICE_LAG_INVALID_PORT)
+		ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt);
+	mutex_unlock(&pf->lag_mutex);
+
 	ice_lag_move_new_vf_nodes(vf);
 
 	/* send the response to the VF */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 6845556581c3..5df42634ceb8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -1945,7 +1945,7 @@ struct mcs_hw_info {
 	u8 tcam_entries;	/* RX/TX Tcam entries per mcs block */
 	u8 secy_entries;	/* RX/TX SECY entries per mcs block */
 	u8 sc_entries;		/* RX/TX SC CAM entries per mcs block */
-	u8 sa_entries;		/* PN table entries = SA entries */
+	u16 sa_entries;		/* PN table entries = SA entries */
 	u64 rsvd[16];
 };
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c
index c43f19dfbd74..c1775bd01c2b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c
@@ -117,7 +117,7 @@ void mcs_get_rx_secy_stats(struct mcs *mcs, struct mcs_secy_stats *stats, int id
 	reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYTAGGEDCTLX(id);
 	stats->pkt_tagged_ctl_cnt = mcs_reg_read(mcs, reg);
 
-	reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDORNOTAGX(id);
+	reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDX(id);
 	stats->pkt_untaged_cnt = mcs_reg_read(mcs, reg);
 
 	reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYCTLX(id);
@@ -215,7 +215,7 @@ void mcs_get_sc_stats(struct mcs *mcs, struct mcs_sc_stats *stats,
 		reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSCNOTVALIDX(id);
 		stats->pkt_notvalid_cnt = mcs_reg_read(mcs, reg);
 
-		reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDOROKX(id);
+		reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDX(id);
 		stats->pkt_unchecked_cnt = mcs_reg_read(mcs, reg);
 
 		if (mcs->hw->mcs_blks > 1) {
@@ -1219,6 +1219,17 @@ struct mcs *mcs_get_pdata(int mcs_id)
 	return NULL;
 }
 
+bool is_mcs_bypass(int mcs_id)
+{
+	struct mcs *mcs_dev;
+
+	list_for_each_entry(mcs_dev, &mcs_list, mcs_list) {
+		if (mcs_dev->mcs_id == mcs_id)
+			return mcs_dev->bypass;
+	}
+	return true;
+}
+
 void mcs_set_port_cfg(struct mcs *mcs, struct mcs_port_cfg_set_req *req)
 {
 	u64 val = 0;
@@ -1436,7 +1447,7 @@ static int mcs_x2p_calibration(struct mcs *mcs)
 	return err;
 }
 
-static void mcs_set_external_bypass(struct mcs *mcs, u8 bypass)
+static void mcs_set_external_bypass(struct mcs *mcs, bool bypass)
 {
 	u64 val;
 
@@ -1447,6 +1458,7 @@ static void mcs_set_external_bypass(struct mcs *mcs, u8 bypass)
 	else
 		val &= ~BIT_ULL(6);
 	mcs_reg_write(mcs, MCSX_MIL_GLOBAL, val);
+	mcs->bypass = bypass;
 }
 
 static void mcs_global_cfg(struct mcs *mcs)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.h b/drivers/net/ethernet/marvell/octeontx2/af/mcs.h
index 0f89dcb76465..f927cc61dfd2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.h
@@ -149,6 +149,7 @@ struct mcs {
 	u16			num_vec;
 	void			*rvu;
 	u16			*tx_sa_active;
+	bool                      bypass;
 };
 
 struct mcs_ops {
@@ -206,6 +207,7 @@ void mcs_get_custom_tag_cfg(struct mcs *mcs, struct mcs_custom_tag_cfg_get_req *
 int mcs_alloc_ctrlpktrule(struct rsrc_bmap *rsrc, u16 *pf_map, u16 offset, u16 pcifunc);
 int mcs_free_ctrlpktrule(struct mcs *mcs, struct mcs_free_ctrl_pkt_rule_req *req);
 int mcs_ctrlpktrule_write(struct mcs *mcs, struct mcs_ctrl_pkt_rule_write_req *req);
+bool is_mcs_bypass(int mcs_id);
 
 /* CN10K-B APIs */
 void cn10kb_mcs_set_hw_capabilities(struct mcs *mcs);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h
index f3ab01fc363c..f4c6de89002c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h
@@ -810,14 +810,37 @@
 		offset = 0x9d8ull;			\
 	offset; })
 
+#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDX(a) ({	\
+	u64 offset;					\
+							\
+	offset = 0xee80ull;				\
+	if (mcs->hw->mcs_blks > 1)			\
+		offset = 0xe818ull;			\
+	offset += (a) * 0x8ull;				\
+	offset; })
+
+#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDX(a) ({	\
+	u64 offset;					\
+							\
+	offset = 0xa680ull;				\
+	if (mcs->hw->mcs_blks > 1)			\
+		offset = 0xd018ull;			\
+	offset += (a) * 0x8ull;				\
+	offset; })
+
+#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCLATEORDELAYEDX(a)	({	\
+	u64 offset;						\
+								\
+	offset = 0xf680ull;					\
+	if (mcs->hw->mcs_blks > 1)				\
+		offset = 0xe018ull;				\
+	offset += (a) * 0x8ull;					\
+	offset; })
+
 #define MCSX_CSE_RX_MEM_SLAVE_INOCTETSSCDECRYPTEDX(a)	(0xe680ull + (a) * 0x8ull)
 #define MCSX_CSE_RX_MEM_SLAVE_INOCTETSSCVALIDATEX(a)	(0xde80ull + (a) * 0x8ull)
-#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDORNOTAGX(a)	(0xa680ull + (a) * 0x8ull)
 #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYNOTAGX(a)	(0xd218 + (a) * 0x8ull)
-#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDX(a)	(0xd018ull + (a) * 0x8ull)
-#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDOROKX(a)	(0xee80ull + (a) * 0x8ull)
 #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYCTLX(a)		(0xb680ull + (a) * 0x8ull)
-#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCLATEORDELAYEDX(a) (0xf680ull + (a) * 0x8ull)
 #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSAINVALIDX(a)	(0x12680ull + (a) * 0x8ull)
 #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSANOTUSINGSAERRORX(a) (0x15680ull + (a) * 0x8ull)
 #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSANOTVALIDX(a)	(0x13680ull + (a) * 0x8ull)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 22c395c7d040..731bb82b577c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -2631,6 +2631,9 @@ static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc)
 	rvu_npc_free_mcam_entries(rvu, pcifunc, -1);
 	rvu_mac_reset(rvu, pcifunc);
 
+	if (rvu->mcs_blk_cnt)
+		rvu_mcs_flr_handler(rvu, pcifunc);
+
 	mutex_unlock(&rvu->flr_lock);
 }
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index c4d999ef5ab4..cce2806aaa50 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -345,6 +345,7 @@ struct nix_hw {
 	struct nix_txvlan txvlan;
 	struct nix_ipolicer *ipolicer;
 	u64    *tx_credits;
+	u8	cc_mcs_cnt;
 };
 
 /* RVU block's capabilities or functionality,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
index c70932625d0d..62780d8b4f9a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
@@ -1087,7 +1087,7 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl)
 
 	rvu_dl->devlink_wq = create_workqueue("rvu_devlink_wq");
 	if (!rvu_dl->devlink_wq)
-		goto err;
+		return -ENOMEM;
 
 	INIT_WORK(&rvu_reporters->intr_work, rvu_npa_intr_work);
 	INIT_WORK(&rvu_reporters->err_work, rvu_npa_err_work);
@@ -1095,9 +1095,6 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl)
 	INIT_WORK(&rvu_reporters->ras_work, rvu_npa_ras_work);
 
 	return 0;
-err:
-	rvu_npa_health_reporters_destroy(rvu_dl);
-	return -ENOMEM;
 }
 
 static int rvu_npa_health_reporters_create(struct rvu_devlink *rvu_dl)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 23c2f2ed2fb8..4227ebb4a758 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -12,6 +12,7 @@
 #include "rvu_reg.h"
 #include "rvu.h"
 #include "npc.h"
+#include "mcs.h"
 #include "cgx.h"
 #include "lmac_common.h"
 #include "rvu_npc_hash.h"
@@ -4389,6 +4390,12 @@ static void nix_link_config(struct rvu *rvu, int blkaddr,
 			    SDP_HW_MAX_FRS << 16 | NIC_HW_MIN_FRS);
 	}
 
+	/* Get MCS external bypass status for CN10K-B */
+	if (mcs_get_blkcnt() == 1) {
+		/* Adjust for 2 credits when external bypass is disabled */
+		nix_hw->cc_mcs_cnt = is_mcs_bypass(0) ? 0 : 2;
+	}
+
 	/* Set credits for Tx links assuming max packet length allowed.
 	 * This will be reconfigured based on MTU set for PF/VF.
 	 */
@@ -4412,6 +4419,7 @@ static void nix_link_config(struct rvu *rvu, int blkaddr,
 			tx_credits = (lmac_fifo_len - lmac_max_frs) / 16;
 			/* Enable credits and set credit pkt count to max allowed */
 			cfg =  (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1);
+			cfg |= FIELD_PREP(NIX_AF_LINKX_MCS_CNT_MASK, nix_hw->cc_mcs_cnt);
 
 			link = iter + slink;
 			nix_hw->tx_credits[link] = tx_credits;
@@ -5505,6 +5513,8 @@ int rvu_mbox_handler_nix_bandprof_free(struct rvu *rvu,
 
 		ipolicer = &nix_hw->ipolicer[layer];
 		for (idx = 0; idx < req->prof_count[layer]; idx++) {
+			if (idx == MAX_BANDPROF_PER_PFFUNC)
+				break;
 			prof_idx = req->prof_idx[layer][idx];
 			if (prof_idx >= ipolicer->band_prof.max ||
 			    ipolicer->pfvf_map[prof_idx] != pcifunc)
@@ -5518,8 +5528,6 @@ int rvu_mbox_handler_nix_bandprof_free(struct rvu *rvu,
 			ipolicer->pfvf_map[prof_idx] = 0x00;
 			ipolicer->match_id[prof_idx] = 0;
 			rvu_free_rsrc(&ipolicer->band_prof, prof_idx);
-			if (idx == MAX_BANDPROF_PER_PFFUNC)
-				break;
 		}
 	}
 	mutex_unlock(&rvu->rsrc_lock);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 16cfc802e348..f65805860c8d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -389,7 +389,13 @@ static u64 npc_get_default_entry_action(struct rvu *rvu, struct npc_mcam *mcam,
 	int bank, nixlf, index;
 
 	/* get ucast entry rule entry index */
-	nix_get_nixlf(rvu, pf_func, &nixlf, NULL);
+	if (nix_get_nixlf(rvu, pf_func, &nixlf, NULL)) {
+		dev_err(rvu->dev, "%s: nixlf not attached to pcifunc:0x%x\n",
+			__func__, pf_func);
+		/* Action 0 is drop */
+		return 0;
+	}
+
 	index = npc_get_nixlf_mcam_index(mcam, pf_func, nixlf,
 					 NIXLF_UCAST_ENTRY);
 	bank = npc_get_bank(mcam, index);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c
index b3150f053291..d46ac29adb96 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c
@@ -31,8 +31,8 @@ static struct hw_reg_map txsch_reg_map[NIX_TXSCH_LVL_CNT] = {
 	{NIX_TXSCH_LVL_TL4, 3, 0xFFFF, {{0x0B00, 0x0B08}, {0x0B10, 0x0B18},
 			      {0x1200, 0x12E0} } },
 	{NIX_TXSCH_LVL_TL3, 4, 0xFFFF, {{0x1000, 0x10E0}, {0x1600, 0x1608},
-			      {0x1610, 0x1618}, {0x1700, 0x17B0} } },
-	{NIX_TXSCH_LVL_TL2, 2, 0xFFFF, {{0x0E00, 0x0EE0}, {0x1700, 0x17B0} } },
+			      {0x1610, 0x1618}, {0x1700, 0x17C8} } },
+	{NIX_TXSCH_LVL_TL2, 2, 0xFFFF, {{0x0E00, 0x0EE0}, {0x1700, 0x17C8} } },
 	{NIX_TXSCH_LVL_TL1, 1, 0xFFFF, {{0x0C00, 0x0D98} } },
 };
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
index b42e631e52d0..18c1c9f361cc 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
@@ -437,6 +437,7 @@
 
 #define NIX_AF_LINKX_BASE_MASK		GENMASK_ULL(11, 0)
 #define NIX_AF_LINKX_RANGE_MASK		GENMASK_ULL(19, 16)
+#define NIX_AF_LINKX_MCS_CNT_MASK	GENMASK_ULL(33, 32)
 
 /* SSO */
 #define SSO_AF_CONST			(0x1000)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
index a4a258da8dd5..c1c99d7054f8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
@@ -450,6 +450,9 @@ int cn10k_set_ipolicer_rate(struct otx2_nic *pfvf, u16 profile,
 	aq->prof.pebs_mantissa = 0;
 	aq->prof_mask.pebs_mantissa = 0xFF;
 
+	aq->prof.hl_en = 0;
+	aq->prof_mask.hl_en = 1;
+
 	/* Fill AQ info */
 	aq->qidx = profile;
 	aq->ctype = NIX_AQ_CTYPE_BANDPROF;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index e7c69b57147e..06910307085e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -1070,6 +1070,8 @@ int otx2_init_tc(struct otx2_nic *nic);
 void otx2_shutdown_tc(struct otx2_nic *nic);
 int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 		  void *type_data);
+void otx2_tc_apply_ingress_police_rules(struct otx2_nic *nic);
+
 /* CGX/RPM DMAC filters support */
 int otx2_dmacflt_get_max_cnt(struct otx2_nic *pf);
 int otx2_dmacflt_add(struct otx2_nic *pf, const u8 *mac, u32 bit_pos);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index 9efcec549834..53f6258a973c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -334,9 +334,12 @@ static void otx2_get_pauseparam(struct net_device *netdev,
 	if (is_otx2_lbkvf(pfvf->pdev))
 		return;
 
+	mutex_lock(&pfvf->mbox.lock);
 	req = otx2_mbox_alloc_msg_cgx_cfg_pause_frm(&pfvf->mbox);
-	if (!req)
+	if (!req) {
+		mutex_unlock(&pfvf->mbox.lock);
 		return;
+	}
 
 	if (!otx2_sync_mbox_msg(&pfvf->mbox)) {
 		rsp = (struct cgx_pause_frm_cfg *)
@@ -344,6 +347,7 @@ static void otx2_get_pauseparam(struct net_device *netdev,
 		pause->rx_pause = rsp->rx_pause;
 		pause->tx_pause = rsp->tx_pause;
 	}
+	mutex_unlock(&pfvf->mbox.lock);
 }
 
 static int otx2_set_pauseparam(struct net_device *netdev,
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index ba95ac913274..0c17ebdda148 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -566,7 +566,9 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq)
 		otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(1), intr);
 		otx2_queue_work(mbox, pf->mbox_pfvf_wq, 64, vfs, intr,
 				TYPE_PFVF);
-		vfs -= 64;
+		if (intr)
+			trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr);
+		vfs = 64;
 	}
 
 	intr = otx2_read64(pf, RVU_PF_VFPF_MBOX_INTX(0));
@@ -574,7 +576,8 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq)
 
 	otx2_queue_work(mbox, pf->mbox_pfvf_wq, 0, vfs, intr, TYPE_PFVF);
 
-	trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr);
+	if (intr)
+		trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr);
 
 	return IRQ_HANDLED;
 }
@@ -1685,6 +1688,14 @@ static void otx2_do_set_rx_mode(struct otx2_nic *pf)
 	mutex_unlock(&pf->mbox.lock);
 }
 
+static void otx2_set_irq_coalesce(struct otx2_nic *pfvf)
+{
+	int cint;
+
+	for (cint = 0; cint < pfvf->hw.cint_cnt; cint++)
+		otx2_config_irq_coalescing(pfvf, cint);
+}
+
 static void otx2_dim_work(struct work_struct *w)
 {
 	struct dim_cq_moder cur_moder;
@@ -1700,6 +1711,7 @@ static void otx2_dim_work(struct work_struct *w)
 		CQ_TIMER_THRESH_MAX : cur_moder.usec;
 	pfvf->hw.cq_ecount_wait = (cur_moder.pkts > NAPI_POLL_WEIGHT) ?
 		NAPI_POLL_WEIGHT : cur_moder.pkts;
+	otx2_set_irq_coalesce(pfvf);
 	dim->state = DIM_START_MEASURE;
 }
 
@@ -1870,6 +1882,8 @@ int otx2_open(struct net_device *netdev)
 	if (pf->flags & OTX2_FLAG_DMACFLTR_SUPPORT)
 		otx2_dmacflt_reinstall_flows(pf);
 
+	otx2_tc_apply_ingress_police_rules(pf);
+
 	err = otx2_rxtx_enable(pf, true);
 	/* If a mbox communication error happens at this point then interface
 	 * will end up in a state such that it is in down state but hardware
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 8a5e3987a482..db1e0e0e812d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -47,6 +47,9 @@ struct otx2_tc_flow {
 	bool				is_act_police;
 	u32				prio;
 	struct npc_install_flow_req	req;
+	u64				rate;
+	u32				burst;
+	bool				is_pps;
 };
 
 static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst,
@@ -284,21 +287,10 @@ static int otx2_tc_egress_matchall_delete(struct otx2_nic *nic,
 	return err;
 }
 
-static int otx2_tc_act_set_police(struct otx2_nic *nic,
-				  struct otx2_tc_flow *node,
-				  struct flow_cls_offload *f,
-				  u64 rate, u32 burst, u32 mark,
-				  struct npc_install_flow_req *req, bool pps)
+static int otx2_tc_act_set_hw_police(struct otx2_nic *nic,
+				     struct otx2_tc_flow *node)
 {
-	struct netlink_ext_ack *extack = f->common.extack;
-	struct otx2_hw *hw = &nic->hw;
-	int rq_idx, rc;
-
-	rq_idx = find_first_zero_bit(&nic->rq_bmap, hw->rx_queues);
-	if (rq_idx >= hw->rx_queues) {
-		NL_SET_ERR_MSG_MOD(extack, "Police action rules exceeded");
-		return -EINVAL;
-	}
+	int rc;
 
 	mutex_lock(&nic->mbox.lock);
 
@@ -308,23 +300,17 @@ static int otx2_tc_act_set_police(struct otx2_nic *nic,
 		return rc;
 	}
 
-	rc = cn10k_set_ipolicer_rate(nic, node->leaf_profile, burst, rate, pps);
+	rc = cn10k_set_ipolicer_rate(nic, node->leaf_profile,
+				     node->burst, node->rate, node->is_pps);
 	if (rc)
 		goto free_leaf;
 
-	rc = cn10k_map_unmap_rq_policer(nic, rq_idx, node->leaf_profile, true);
+	rc = cn10k_map_unmap_rq_policer(nic, node->rq, node->leaf_profile, true);
 	if (rc)
 		goto free_leaf;
 
 	mutex_unlock(&nic->mbox.lock);
 
-	req->match_id = mark & 0xFFFFULL;
-	req->index = rq_idx;
-	req->op = NIX_RX_ACTIONOP_UCAST;
-	set_bit(rq_idx, &nic->rq_bmap);
-	node->is_act_police = true;
-	node->rq = rq_idx;
-
 	return 0;
 
 free_leaf:
@@ -336,6 +322,39 @@ free_leaf:
 	return rc;
 }
 
+static int otx2_tc_act_set_police(struct otx2_nic *nic,
+				  struct otx2_tc_flow *node,
+				  struct flow_cls_offload *f,
+				  u64 rate, u32 burst, u32 mark,
+				  struct npc_install_flow_req *req, bool pps)
+{
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct otx2_hw *hw = &nic->hw;
+	int rq_idx, rc;
+
+	rq_idx = find_first_zero_bit(&nic->rq_bmap, hw->rx_queues);
+	if (rq_idx >= hw->rx_queues) {
+		NL_SET_ERR_MSG_MOD(extack, "Police action rules exceeded");
+		return -EINVAL;
+	}
+
+	req->match_id = mark & 0xFFFFULL;
+	req->index = rq_idx;
+	req->op = NIX_RX_ACTIONOP_UCAST;
+
+	node->is_act_police = true;
+	node->rq = rq_idx;
+	node->burst = burst;
+	node->rate = rate;
+	node->is_pps = pps;
+
+	rc = otx2_tc_act_set_hw_police(nic, node);
+	if (!rc)
+		set_bit(rq_idx, &nic->rq_bmap);
+
+	return rc;
+}
+
 static int otx2_tc_parse_actions(struct otx2_nic *nic,
 				 struct flow_action *flow_action,
 				 struct npc_install_flow_req *req,
@@ -1044,6 +1063,11 @@ static int otx2_tc_del_flow(struct otx2_nic *nic,
 	}
 
 	if (flow_node->is_act_police) {
+		__clear_bit(flow_node->rq, &nic->rq_bmap);
+
+		if (nic->flags & OTX2_FLAG_INTF_DOWN)
+			goto free_mcam_flow;
+
 		mutex_lock(&nic->mbox.lock);
 
 		err = cn10k_map_unmap_rq_policer(nic, flow_node->rq,
@@ -1059,11 +1083,10 @@ static int otx2_tc_del_flow(struct otx2_nic *nic,
 				   "Unable to free leaf bandwidth profile(%d)\n",
 				   flow_node->leaf_profile);
 
-		__clear_bit(flow_node->rq, &nic->rq_bmap);
-
 		mutex_unlock(&nic->mbox.lock);
 	}
 
+free_mcam_flow:
 	otx2_del_mcam_flow_entry(nic, flow_node->entry, NULL);
 	otx2_tc_update_mcam_table(nic, flow_cfg, flow_node, false);
 	kfree_rcu(flow_node, rcu);
@@ -1083,6 +1106,11 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
 	if (!(nic->flags & OTX2_FLAG_TC_FLOWER_SUPPORT))
 		return -ENOMEM;
 
+	if (nic->flags & OTX2_FLAG_INTF_DOWN) {
+		NL_SET_ERR_MSG_MOD(extack, "Interface not initialized");
+		return -EINVAL;
+	}
+
 	if (flow_cfg->nr_flows == flow_cfg->max_flows) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "Free MCAM entry not available to add the flow");
@@ -1442,3 +1470,45 @@ void otx2_shutdown_tc(struct otx2_nic *nic)
 	otx2_destroy_tc_flow_list(nic);
 }
 EXPORT_SYMBOL(otx2_shutdown_tc);
+
+static void otx2_tc_config_ingress_rule(struct otx2_nic *nic,
+					struct otx2_tc_flow *node)
+{
+	struct npc_install_flow_req *req;
+
+	if (otx2_tc_act_set_hw_police(nic, node))
+		return;
+
+	mutex_lock(&nic->mbox.lock);
+
+	req = otx2_mbox_alloc_msg_npc_install_flow(&nic->mbox);
+	if (!req)
+		goto err;
+
+	memcpy(req, &node->req, sizeof(struct npc_install_flow_req));
+
+	if (otx2_sync_mbox_msg(&nic->mbox))
+		netdev_err(nic->netdev,
+			   "Failed to install MCAM flow entry for ingress rule");
+err:
+	mutex_unlock(&nic->mbox.lock);
+}
+
+void otx2_tc_apply_ingress_police_rules(struct otx2_nic *nic)
+{
+	struct otx2_flow_config *flow_cfg = nic->flow_cfg;
+	struct otx2_tc_flow *node;
+
+	/* If any ingress policer rules exist for the interface then
+	 * apply those rules. Ingress policer rules depend on bandwidth
+	 * profiles linked to the receive queues. Since no receive queues
+	 * exist when interface is down, ingress policer rules are stored
+	 * and configured in hardware after all receive queues are allocated
+	 * in otx2_open.
+	 */
+	list_for_each_entry(node, &flow_cfg->flow_list_tc, list) {
+		if (node->is_act_police)
+			otx2_tc_config_ingress_rule(nic, node);
+	}
+}
+EXPORT_SYMBOL(otx2_tc_apply_ingress_police_rules);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index 6ee15f3c25ed..4d519ea833b2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -512,11 +512,18 @@ static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_p
 {
 	struct dim_sample dim_sample;
 	u64 rx_frames, rx_bytes;
+	u64 tx_frames, tx_bytes;
 
 	rx_frames = OTX2_GET_RX_STATS(RX_BCAST) + OTX2_GET_RX_STATS(RX_MCAST) +
 		OTX2_GET_RX_STATS(RX_UCAST);
 	rx_bytes = OTX2_GET_RX_STATS(RX_OCTS);
-	dim_update_sample(pfvf->napi_events, rx_frames, rx_bytes, &dim_sample);
+	tx_bytes = OTX2_GET_TX_STATS(TX_OCTS);
+	tx_frames = OTX2_GET_TX_STATS(TX_UCAST);
+
+	dim_update_sample(pfvf->napi_events,
+			  rx_frames + tx_frames,
+			  rx_bytes + tx_bytes,
+			  &dim_sample);
 	net_dim(&cq_poll->dim, dim_sample);
 }
 
@@ -558,16 +565,9 @@ int otx2_napi_handler(struct napi_struct *napi, int budget)
 		if (pfvf->flags & OTX2_FLAG_INTF_DOWN)
 			return workdone;
 
-		/* Check for adaptive interrupt coalesce */
-		if (workdone != 0 &&
-		    ((pfvf->flags & OTX2_FLAG_ADPTV_INT_COAL_ENABLED) ==
-		     OTX2_FLAG_ADPTV_INT_COAL_ENABLED)) {
-			/* Adjust irq coalese using net_dim */
+		/* Adjust irq coalese using net_dim */
+		if (pfvf->flags & OTX2_FLAG_ADPTV_INT_COAL_ENABLED)
 			otx2_adjust_adaptive_coalese(pfvf, cq_poll);
-			/* Update irq coalescing */
-			for (i = 0; i < pfvf->hw.cint_cnt; i++)
-				otx2_config_irq_coalescing(pfvf, i);
-		}
 
 		if (unlikely(!filled_cnt)) {
 			struct refill_work *work;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index 060a77f2265d..e522845c7c21 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -160,6 +160,18 @@ struct nfp_tun_mac_addr_offload {
 	u8 addr[ETH_ALEN];
 };
 
+/**
+ * struct nfp_neigh_update_work - update neighbour information to nfp
+ * @work:	Work queue for writing neigh to the nfp
+ * @n:		neighbour entry
+ * @app:	Back pointer to app
+ */
+struct nfp_neigh_update_work {
+	struct work_struct work;
+	struct neighbour *n;
+	struct nfp_app *app;
+};
+
 enum nfp_flower_mac_offload_cmd {
 	NFP_TUNNEL_MAC_OFFLOAD_ADD =		0,
 	NFP_TUNNEL_MAC_OFFLOAD_DEL =		1,
@@ -607,38 +619,30 @@ err:
 	nfp_flower_cmsg_warn(app, "Neighbour configuration failed.\n");
 }
 
-static int
-nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
-			    void *ptr)
+static void
+nfp_tun_release_neigh_update_work(struct nfp_neigh_update_work *update_work)
 {
-	struct nfp_flower_priv *app_priv;
-	struct netevent_redirect *redir;
-	struct neighbour *n;
+	neigh_release(update_work->n);
+	kfree(update_work);
+}
+
+static void nfp_tun_neigh_update(struct work_struct *work)
+{
+	struct nfp_neigh_update_work *update_work;
 	struct nfp_app *app;
+	struct neighbour *n;
 	bool neigh_invalid;
 	int err;
 
-	switch (event) {
-	case NETEVENT_REDIRECT:
-		redir = (struct netevent_redirect *)ptr;
-		n = redir->neigh;
-		break;
-	case NETEVENT_NEIGH_UPDATE:
-		n = (struct neighbour *)ptr;
-		break;
-	default:
-		return NOTIFY_DONE;
-	}
-
-	neigh_invalid = !(n->nud_state & NUD_VALID) || n->dead;
-
-	app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb);
-	app = app_priv->app;
+	update_work = container_of(work, struct nfp_neigh_update_work, work);
+	app = update_work->app;
+	n = update_work->n;
 
 	if (!nfp_flower_get_port_id_from_netdev(app, n->dev))
-		return NOTIFY_DONE;
+		goto out;
 
 #if IS_ENABLED(CONFIG_INET)
+	neigh_invalid = !(n->nud_state & NUD_VALID) || n->dead;
 	if (n->tbl->family == AF_INET6) {
 #if IS_ENABLED(CONFIG_IPV6)
 		struct flowi6 flow6 = {};
@@ -655,13 +659,11 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
 			dst = ip6_dst_lookup_flow(dev_net(n->dev), NULL,
 						  &flow6, NULL);
 			if (IS_ERR(dst))
-				return NOTIFY_DONE;
+				goto out;
 
 			dst_release(dst);
 		}
 		nfp_tun_write_neigh(n->dev, app, &flow6, n, true, false);
-#else
-		return NOTIFY_DONE;
 #endif /* CONFIG_IPV6 */
 	} else {
 		struct flowi4 flow4 = {};
@@ -678,17 +680,71 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
 			rt = ip_route_output_key(dev_net(n->dev), &flow4);
 			err = PTR_ERR_OR_ZERO(rt);
 			if (err)
-				return NOTIFY_DONE;
+				goto out;
 
 			ip_rt_put(rt);
 		}
 		nfp_tun_write_neigh(n->dev, app, &flow4, n, false, false);
 	}
-#else
-	return NOTIFY_DONE;
 #endif /* CONFIG_INET */
+out:
+	nfp_tun_release_neigh_update_work(update_work);
+}
 
-	return NOTIFY_OK;
+static struct nfp_neigh_update_work *
+nfp_tun_alloc_neigh_update_work(struct nfp_app *app, struct neighbour *n)
+{
+	struct nfp_neigh_update_work *update_work;
+
+	update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC);
+	if (!update_work)
+		return NULL;
+
+	INIT_WORK(&update_work->work, nfp_tun_neigh_update);
+	neigh_hold(n);
+	update_work->n = n;
+	update_work->app = app;
+
+	return update_work;
+}
+
+static int
+nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
+			    void *ptr)
+{
+	struct nfp_neigh_update_work *update_work;
+	struct nfp_flower_priv *app_priv;
+	struct netevent_redirect *redir;
+	struct neighbour *n;
+	struct nfp_app *app;
+
+	switch (event) {
+	case NETEVENT_REDIRECT:
+		redir = (struct netevent_redirect *)ptr;
+		n = redir->neigh;
+		break;
+	case NETEVENT_NEIGH_UPDATE:
+		n = (struct neighbour *)ptr;
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+#if IS_ENABLED(CONFIG_IPV6)
+	if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
+#else
+	if (n->tbl != &arp_tbl)
+#endif
+		return NOTIFY_DONE;
+
+	app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb);
+	app = app_priv->app;
+	update_work = nfp_tun_alloc_neigh_update_work(app, n);
+	if (!update_work)
+		return NOTIFY_DONE;
+
+	queue_work(system_highpri_wq, &update_work->work);
+
+	return NOTIFY_DONE;
 }
 
 void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb)
@@ -706,6 +762,7 @@ void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb)
 	netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL);
 	if (!netdev)
 		goto fail_rcu_unlock;
+	dev_hold(netdev);
 
 	flow.daddr = payload->ipv4_addr;
 	flow.flowi4_proto = IPPROTO_UDP;
@@ -725,13 +782,16 @@ void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb)
 	ip_rt_put(rt);
 	if (!n)
 		goto fail_rcu_unlock;
+	rcu_read_unlock();
+
 	nfp_tun_write_neigh(n->dev, app, &flow, n, false, true);
 	neigh_release(n);
-	rcu_read_unlock();
+	dev_put(netdev);
 	return;
 
 fail_rcu_unlock:
 	rcu_read_unlock();
+	dev_put(netdev);
 	nfp_flower_cmsg_warn(app, "Requested route not found.\n");
 }
 
@@ -749,6 +809,7 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb)
 	netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL);
 	if (!netdev)
 		goto fail_rcu_unlock;
+	dev_hold(netdev);
 
 	flow.daddr = payload->ipv6_addr;
 	flow.flowi6_proto = IPPROTO_UDP;
@@ -766,14 +827,16 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb)
 	dst_release(dst);
 	if (!n)
 		goto fail_rcu_unlock;
+	rcu_read_unlock();
 
 	nfp_tun_write_neigh(n->dev, app, &flow, n, true, true);
 	neigh_release(n);
-	rcu_read_unlock();
+	dev_put(netdev);
 	return;
 
 fail_rcu_unlock:
 	rcu_read_unlock();
+	dev_put(netdev);
 	nfp_flower_cmsg_warn(app, "Requested IPv6 route not found.\n");
 }
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 1dbc3cb50b1d..9b5463040075 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -223,7 +223,7 @@ struct ionic_desc_info {
 	void *cb_arg;
 };
 
-#define IONIC_QUEUE_NAME_MAX_SZ		32
+#define IONIC_QUEUE_NAME_MAX_SZ		16
 
 struct ionic_queue {
 	struct device *dev;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index edc14730ce88..bad919343180 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -49,24 +49,24 @@ static void ionic_lif_queue_identify(struct ionic_lif *lif);
 static void ionic_dim_work(struct work_struct *work)
 {
 	struct dim *dim = container_of(work, struct dim, work);
+	struct ionic_intr_info *intr;
 	struct dim_cq_moder cur_moder;
 	struct ionic_qcq *qcq;
+	struct ionic_lif *lif;
 	u32 new_coal;
 
 	cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 	qcq = container_of(dim, struct ionic_qcq, dim);
-	new_coal = ionic_coal_usec_to_hw(qcq->q.lif->ionic, cur_moder.usec);
+	lif = qcq->q.lif;
+	new_coal = ionic_coal_usec_to_hw(lif->ionic, cur_moder.usec);
 	new_coal = new_coal ? new_coal : 1;
 
-	if (qcq->intr.dim_coal_hw != new_coal) {
-		unsigned int qi = qcq->cq.bound_q->index;
-		struct ionic_lif *lif = qcq->q.lif;
-
-		qcq->intr.dim_coal_hw = new_coal;
+	intr = &qcq->intr;
+	if (intr->dim_coal_hw != new_coal) {
+		intr->dim_coal_hw = new_coal;
 
 		ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
-				     lif->rxqcqs[qi]->intr.index,
-				     qcq->intr.dim_coal_hw);
+				     intr->index, intr->dim_coal_hw);
 	}
 
 	dim->state = DIM_START_MEASURE;
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 295366a85c63..bb787a52bc75 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -196,6 +196,7 @@ enum rtl_registers {
 					/* No threshold before first PCI xfer */
 #define	RX_FIFO_THRESH			(7 << RXCFG_FIFO_SHIFT)
 #define	RX_EARLY_OFF			(1 << 11)
+#define	RX_PAUSE_SLOT_ON		(1 << 11)	/* 8125b and later */
 #define	RXCFG_DMA_SHIFT			8
 					/* Unlimited maximum PCI burst. */
 #define	RX_DMA_BURST			(7 << RXCFG_DMA_SHIFT)
@@ -579,6 +580,7 @@ struct rtl8169_tc_offsets {
 enum rtl_flag {
 	RTL_FLAG_TASK_ENABLED = 0,
 	RTL_FLAG_TASK_RESET_PENDING,
+	RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE,
 	RTL_FLAG_TASK_TX_TIMEOUT,
 	RTL_FLAG_MAX
 };
@@ -2305,9 +2307,13 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53:
 		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
 		break;
-	case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_63:
+	case RTL_GIGA_MAC_VER_61:
 		RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST);
 		break;
+	case RTL_GIGA_MAC_VER_63:
+		RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST |
+			RX_PAUSE_SLOT_ON);
+		break;
 	default:
 		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST);
 		break;
@@ -4582,6 +4588,8 @@ static void rtl_task(struct work_struct *work)
 reset:
 		rtl_reset_work(tp);
 		netif_wake_queue(tp->dev);
+	} else if (test_and_clear_bit(RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, tp->wk.flags)) {
+		rtl_reset_work(tp);
 	}
 out_unlock:
 	rtnl_unlock();
@@ -4615,7 +4623,7 @@ static void r8169_phylink_handler(struct net_device *ndev)
 	} else {
 		/* In few cases rx is broken after link-down otherwise */
 		if (rtl_is_8125(tp))
-			rtl_reset_work(tp);
+			rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE);
 		pm_runtime_idle(d);
 	}
 
@@ -4691,7 +4699,7 @@ static int rtl8169_close(struct net_device *dev)
 	rtl8169_down(tp);
 	rtl8169_rx_clear(tp);
 
-	cancel_work_sync(&tp->wk.work);
+	cancel_work(&tp->wk.work);
 
 	free_irq(tp->irq, tp);
 
@@ -4925,6 +4933,8 @@ static void rtl_remove_one(struct pci_dev *pdev)
 	if (pci_dev_run_wake(pdev))
 		pm_runtime_get_noresume(&pdev->dev);
 
+	cancel_work_sync(&tp->wk.work);
+
 	unregister_netdev(tp->dev);
 
 	if (tp->dash_type != RTL_DASH_NONE)
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index c70cff80cc99..664eda4b5a11 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -515,6 +515,15 @@ static void ravb_emac_init_gbeth(struct net_device *ndev)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
 
+	if (priv->phy_interface == PHY_INTERFACE_MODE_MII) {
+		ravb_write(ndev, (1000 << 16) | CXR35_SEL_XMII_MII, CXR35);
+		ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1, 0);
+	} else {
+		ravb_write(ndev, (1000 << 16) | CXR35_SEL_XMII_RGMII, CXR35);
+		ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1,
+			    CXR31_SEL_LINK0);
+	}
+
 	/* Receive frame limit set register */
 	ravb_write(ndev, GBETH_RX_BUFF_MAX + ETH_FCS_LEN, RFLR);
 
@@ -537,14 +546,6 @@ static void ravb_emac_init_gbeth(struct net_device *ndev)
 
 	/* E-MAC interrupt enable register */
 	ravb_write(ndev, ECSIPR_ICDIP, ECSIPR);
-
-	if (priv->phy_interface == PHY_INTERFACE_MODE_MII) {
-		ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1, 0);
-		ravb_write(ndev, (1000 << 16) | CXR35_SEL_XMII_MII, CXR35);
-	} else {
-		ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1,
-			    CXR31_SEL_LINK0);
-	}
 }
 
 static void ravb_emac_init_rcar(struct net_device *ndev)
@@ -1811,19 +1812,20 @@ static int ravb_open(struct net_device *ndev)
 	if (info->gptp)
 		ravb_ptp_init(ndev, priv->pdev);
 
-	netif_tx_start_all_queues(ndev);
-
 	/* PHY control start */
 	error = ravb_phy_start(ndev);
 	if (error)
 		goto out_ptp_stop;
 
+	netif_tx_start_all_queues(ndev);
+
 	return 0;
 
 out_ptp_stop:
 	/* Stop PTP Clock driver */
 	if (info->gptp)
 		ravb_ptp_stop(ndev);
+	ravb_stop_dma(ndev);
 out_free_irq_mgmta:
 	if (!info->multi_irqs)
 		goto out_free_irq;
@@ -1874,6 +1876,12 @@ static void ravb_tx_timeout_work(struct work_struct *work)
 	struct net_device *ndev = priv->ndev;
 	int error;
 
+	if (!rtnl_trylock()) {
+		usleep_range(1000, 2000);
+		schedule_work(&priv->work);
+		return;
+	}
+
 	netif_tx_stop_all_queues(ndev);
 
 	/* Stop PTP Clock driver */
@@ -1907,7 +1915,7 @@ static void ravb_tx_timeout_work(struct work_struct *work)
 		 */
 		netdev_err(ndev, "%s: ravb_dmac_init() failed, error %d\n",
 			   __func__, error);
-		return;
+		goto out_unlock;
 	}
 	ravb_emac_init(ndev);
 
@@ -1917,6 +1925,9 @@ out:
 		ravb_ptp_init(ndev, priv->pdev);
 
 	netif_tx_start_all_queues(ndev);
+
+out_unlock:
+	rtnl_unlock();
 }
 
 /* Packet transmit function for Ethernet AVB */
@@ -2645,9 +2656,14 @@ static int ravb_probe(struct platform_device *pdev)
 	ndev->features = info->net_features;
 	ndev->hw_features = info->net_hw_features;
 
-	reset_control_deassert(rstc);
+	error = reset_control_deassert(rstc);
+	if (error)
+		goto out_free_netdev;
+
 	pm_runtime_enable(&pdev->dev);
-	pm_runtime_get_sync(&pdev->dev);
+	error = pm_runtime_resume_and_get(&pdev->dev);
+	if (error < 0)
+		goto out_rpm_disable;
 
 	if (info->multi_irqs) {
 		if (info->err_mgmt_irqs)
@@ -2872,11 +2888,12 @@ out_disable_gptp_clk:
 out_disable_refclk:
 	clk_disable_unprepare(priv->refclk);
 out_release:
-	free_netdev(ndev);
-
 	pm_runtime_put(&pdev->dev);
+out_rpm_disable:
 	pm_runtime_disable(&pdev->dev);
 	reset_control_assert(rstc);
+out_free_netdev:
+	free_netdev(ndev);
 	return error;
 }
 
@@ -2886,22 +2903,26 @@ static void ravb_remove(struct platform_device *pdev)
 	struct ravb_private *priv = netdev_priv(ndev);
 	const struct ravb_hw_info *info = priv->info;
 
-	/* Stop PTP Clock driver */
-	if (info->ccc_gac)
-		ravb_ptp_stop(ndev);
-
-	clk_disable_unprepare(priv->gptp_clk);
-	clk_disable_unprepare(priv->refclk);
-
-	/* Set reset mode */
-	ravb_write(ndev, CCC_OPC_RESET, CCC);
 	unregister_netdev(ndev);
 	if (info->nc_queues)
 		netif_napi_del(&priv->napi[RAVB_NC]);
 	netif_napi_del(&priv->napi[RAVB_BE]);
+
 	ravb_mdio_release(priv);
+
+	/* Stop PTP Clock driver */
+	if (info->ccc_gac)
+		ravb_ptp_stop(ndev);
+
 	dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat,
 			  priv->desc_bat_dma);
+
+	/* Set reset mode */
+	ravb_write(ndev, CCC_OPC_RESET, CCC);
+
+	clk_disable_unprepare(priv->gptp_clk);
+	clk_disable_unprepare(priv->refclk);
+
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	reset_control_assert(priv->rstc);
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index 43a7795d6591..e77c6ff93d81 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -1504,8 +1504,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd
 {
 	struct rswitch_device *rdev = netdev_priv(ndev);
 	struct rswitch_gwca_queue *gq = rdev->tx_queue;
+	netdev_tx_t ret = NETDEV_TX_OK;
 	struct rswitch_ext_desc *desc;
-	int ret = NETDEV_TX_OK;
 	dma_addr_t dma_addr;
 
 	if (rswitch_get_num_cur_queues(gq) >= gq->ring_size - 1) {
@@ -1517,10 +1517,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd
 		return ret;
 
 	dma_addr = dma_map_single(ndev->dev.parent, skb->data, skb->len, DMA_TO_DEVICE);
-	if (dma_mapping_error(ndev->dev.parent, dma_addr)) {
-		dev_kfree_skb_any(skb);
-		return ret;
-	}
+	if (dma_mapping_error(ndev->dev.parent, dma_addr))
+		goto err_kfree;
 
 	gq->skbs[gq->cur] = skb;
 	desc = &gq->tx_ring[gq->cur];
@@ -1533,10 +1531,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd
 		struct rswitch_gwca_ts_info *ts_info;
 
 		ts_info = kzalloc(sizeof(*ts_info), GFP_ATOMIC);
-		if (!ts_info) {
-			dma_unmap_single(ndev->dev.parent, dma_addr, skb->len, DMA_TO_DEVICE);
-			return -ENOMEM;
-		}
+		if (!ts_info)
+			goto err_unmap;
 
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 		rdev->ts_tag++;
@@ -1559,6 +1555,14 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd
 	rswitch_modify(rdev->addr, GWTRC(gq->index), 0, BIT(gq->index % 32));
 
 	return ret;
+
+err_unmap:
+	dma_unmap_single(ndev->dev.parent, dma_addr, skb->len, DMA_TO_DEVICE);
+
+err_kfree:
+	dev_kfree_skb_any(skb);
+
+	return ret;
 }
 
 static struct net_device_stats *rswitch_get_stats(struct net_device *ndev)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index e95d35f1e5a0..8fd167501fa0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -710,28 +710,22 @@ void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev,
 	}
 }
 
-void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
+void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg,
+			  u32 num_txq, u32 num_rxq,
 			  bool enable)
 {
 	u32 value;
 
-	if (!enable) {
-		value = readl(ioaddr + MAC_FPE_CTRL_STS);
-
-		value &= ~EFPE;
-
-		writel(value, ioaddr + MAC_FPE_CTRL_STS);
-		return;
+	if (enable) {
+		cfg->fpe_csr = EFPE;
+		value = readl(ioaddr + GMAC_RXQ_CTRL1);
+		value &= ~GMAC_RXQCTRL_FPRQ;
+		value |= (num_rxq - 1) << GMAC_RXQCTRL_FPRQ_SHIFT;
+		writel(value, ioaddr + GMAC_RXQ_CTRL1);
+	} else {
+		cfg->fpe_csr = 0;
 	}
-
-	value = readl(ioaddr + GMAC_RXQ_CTRL1);
-	value &= ~GMAC_RXQCTRL_FPRQ;
-	value |= (num_rxq - 1) << GMAC_RXQCTRL_FPRQ_SHIFT;
-	writel(value, ioaddr + GMAC_RXQ_CTRL1);
-
-	value = readl(ioaddr + MAC_FPE_CTRL_STS);
-	value |= EFPE;
-	writel(value, ioaddr + MAC_FPE_CTRL_STS);
+	writel(cfg->fpe_csr, ioaddr + MAC_FPE_CTRL_STS);
 }
 
 int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev)
@@ -741,6 +735,9 @@ int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev)
 
 	status = FPE_EVENT_UNKNOWN;
 
+	/* Reads from the MAC_FPE_CTRL_STS register should only be performed
+	 * here, since the status flags of MAC_FPE_CTRL_STS are "clear on read"
+	 */
 	value = readl(ioaddr + MAC_FPE_CTRL_STS);
 
 	if (value & TRSP) {
@@ -766,19 +763,15 @@ int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev)
 	return status;
 }
 
-void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, enum stmmac_mpacket_type type)
+void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg,
+			     enum stmmac_mpacket_type type)
 {
-	u32 value;
+	u32 value = cfg->fpe_csr;
 
-	value = readl(ioaddr + MAC_FPE_CTRL_STS);
-
-	if (type == MPACKET_VERIFY) {
-		value &= ~SRSP;
+	if (type == MPACKET_VERIFY)
 		value |= SVER;
-	} else {
-		value &= ~SVER;
+	else if (type == MPACKET_RESPONSE)
 		value |= SRSP;
-	}
 
 	writel(value, ioaddr + MAC_FPE_CTRL_STS);
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index 53c138d0ff48..34e620790eb3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -153,9 +153,11 @@ int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
 			 unsigned int ptp_rate);
 void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev,
 			   struct stmmac_extra_stats *x, u32 txqcnt);
-void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
+void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg,
+			  u32 num_txq, u32 num_rxq,
 			  bool enable);
 void dwmac5_fpe_send_mpacket(void __iomem *ioaddr,
+			     struct stmmac_fpe_cfg *cfg,
 			     enum stmmac_mpacket_type type);
 int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 453e88b75be0..a74e71db79f9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -1484,7 +1484,8 @@ static int dwxgmac3_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
 	return 0;
 }
 
-static void dwxgmac3_fpe_configure(void __iomem *ioaddr, u32 num_txq,
+static void dwxgmac3_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg,
+				   u32 num_txq,
 				   u32 num_rxq, bool enable)
 {
 	u32 value;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index b95d3e137813..68aa2d5ca6e5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -412,9 +412,11 @@ struct stmmac_ops {
 			     unsigned int ptp_rate);
 	void (*est_irq_status)(void __iomem *ioaddr, struct net_device *dev,
 			       struct stmmac_extra_stats *x, u32 txqcnt);
-	void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
+	void (*fpe_configure)(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg,
+			      u32 num_txq, u32 num_rxq,
 			      bool enable);
 	void (*fpe_send_mpacket)(void __iomem *ioaddr,
+				 struct stmmac_fpe_cfg *cfg,
 				 enum stmmac_mpacket_type type);
 	int (*fpe_irq_status)(void __iomem *ioaddr, struct net_device *dev);
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index ea4910ae0921..6a7c1d325c46 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
@@ -177,8 +177,10 @@
 #define MMC_XGMAC_RX_DISCARD_OCT_GB	0x1b4
 #define MMC_XGMAC_RX_ALIGN_ERR_PKT	0x1bc
 
+#define MMC_XGMAC_TX_FPE_INTR_MASK	0x204
 #define MMC_XGMAC_TX_FPE_FRAG		0x208
 #define MMC_XGMAC_TX_HOLD_REQ		0x20c
+#define MMC_XGMAC_RX_FPE_INTR_MASK	0x224
 #define MMC_XGMAC_RX_PKT_ASSEMBLY_ERR	0x228
 #define MMC_XGMAC_RX_PKT_SMD_ERR	0x22c
 #define MMC_XGMAC_RX_PKT_ASSEMBLY_OK	0x230
@@ -352,6 +354,8 @@ static void dwxgmac_mmc_intr_all_mask(void __iomem *mmcaddr)
 {
 	writel(0x0, mmcaddr + MMC_RX_INTR_MASK);
 	writel(0x0, mmcaddr + MMC_TX_INTR_MASK);
+	writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_TX_FPE_INTR_MASK);
+	writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_RX_FPE_INTR_MASK);
 	writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_RX_IPC_INTR_MASK);
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 2afb2bd25977..37e64283f910 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -964,7 +964,8 @@ static void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up)
 	bool *hs_enable = &fpe_cfg->hs_enable;
 
 	if (is_up && *hs_enable) {
-		stmmac_fpe_send_mpacket(priv, priv->ioaddr, MPACKET_VERIFY);
+		stmmac_fpe_send_mpacket(priv, priv->ioaddr, fpe_cfg,
+					MPACKET_VERIFY);
 	} else {
 		*lo_state = FPE_STATE_OFF;
 		*lp_state = FPE_STATE_OFF;
@@ -5839,6 +5840,7 @@ static void stmmac_fpe_event_status(struct stmmac_priv *priv, int status)
 		/* If user has requested FPE enable, quickly response */
 		if (*hs_enable)
 			stmmac_fpe_send_mpacket(priv, priv->ioaddr,
+						fpe_cfg,
 						MPACKET_RESPONSE);
 	}
 
@@ -7263,6 +7265,7 @@ static void stmmac_fpe_lp_task(struct work_struct *work)
 		if (*lo_state == FPE_STATE_ENTERING_ON &&
 		    *lp_state == FPE_STATE_ENTERING_ON) {
 			stmmac_fpe_configure(priv, priv->ioaddr,
+					     fpe_cfg,
 					     priv->plat->tx_queues_to_use,
 					     priv->plat->rx_queues_to_use,
 					     *enable);
@@ -7281,6 +7284,7 @@ static void stmmac_fpe_lp_task(struct work_struct *work)
 			netdev_info(priv->dev, SEND_VERIFY_MPAKCET_FMT,
 				    *lo_state, *lp_state);
 			stmmac_fpe_send_mpacket(priv, priv->ioaddr,
+						fpe_cfg,
 						MPACKET_VERIFY);
 		}
 		/* Sleep then retry */
@@ -7295,6 +7299,7 @@ void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable)
 	if (priv->plat->fpe_cfg->hs_enable != enable) {
 		if (enable) {
 			stmmac_fpe_send_mpacket(priv, priv->ioaddr,
+						priv->plat->fpe_cfg,
 						MPACKET_VERIFY);
 		} else {
 			priv->plat->fpe_cfg->lo_fpe_state = FPE_STATE_OFF;
@@ -7755,6 +7760,7 @@ int stmmac_suspend(struct device *dev)
 	if (priv->dma_cap.fpesel) {
 		/* Disable FPE */
 		stmmac_fpe_configure(priv, priv->ioaddr,
+				     priv->plat->fpe_cfg,
 				     priv->plat->tx_queues_to_use,
 				     priv->plat->rx_queues_to_use, false);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index ac41ef4cbd2f..6ad3e0a11936 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -1079,6 +1079,7 @@ disable:
 
 	priv->plat->fpe_cfg->enable = false;
 	stmmac_fpe_configure(priv, priv->ioaddr,
+			     priv->plat->fpe_cfg,
 			     priv->plat->tx_queues_to_use,
 			     priv->plat->rx_queues_to_use,
 			     false);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index 2823861e5a92..a5a50b5a8816 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -1972,11 +1972,11 @@ void wx_reset_interrupt_capability(struct wx *wx)
 	if (!pdev->msi_enabled && !pdev->msix_enabled)
 		return;
 
-	pci_free_irq_vectors(wx->pdev);
 	if (pdev->msix_enabled) {
 		kfree(wx->msix_entries);
 		wx->msix_entries = NULL;
 	}
+	pci_free_irq_vectors(wx->pdev);
 }
 EXPORT_SYMBOL(wx_reset_interrupt_capability);
 
diff --git a/drivers/net/hyperv/Kconfig b/drivers/net/hyperv/Kconfig
index ca7bf7f897d3..c8cbd85adcf9 100644
--- a/drivers/net/hyperv/Kconfig
+++ b/drivers/net/hyperv/Kconfig
@@ -3,5 +3,6 @@ config HYPERV_NET
 	tristate "Microsoft Hyper-V virtual network driver"
 	depends on HYPERV
 	select UCS2_STRING
+	select NLS
 	help
 	  Select this option to enable the Hyper-V virtual network driver.
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index f60eb97e3a62..608953d4f98d 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -93,7 +93,7 @@ static void nsim_prog_set_loaded(struct bpf_prog *prog, bool loaded)
 {
 	struct nsim_bpf_bound_prog *state;
 
-	if (!prog || !prog->aux->offload)
+	if (!prog || !bpf_prog_is_offloaded(prog->aux))
 		return;
 
 	state = prog->aux->offload->dev_priv;
@@ -311,7 +311,7 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
 	if (!bpf->prog)
 		return 0;
 
-	if (!bpf->prog->aux->offload) {
+	if (!bpf_prog_is_offloaded(bpf->prog->aux)) {
 		NSIM_EA(bpf->extack, "xdpoffload of non-bound program");
 		return -EINVAL;
 	}
diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index 97bd6705c241..39171380ccf2 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -851,6 +851,12 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
 		return -EACCES;
 	}
 
+	if (data[IFLA_NETKIT_PEER_INFO]) {
+		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO],
+				    "netkit peer info cannot be changed after device creation");
+		return -EINVAL;
+	}
+
 	if (data[IFLA_NETKIT_POLICY]) {
 		attr = data[IFLA_NETKIT_POLICY];
 		policy = nla_get_u32(attr);
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 2c5c1e91ded6..9bf2140fd0a1 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3000,6 +3000,8 @@ static void rtl8152_nic_reset(struct r8152 *tp)
 		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, CR_RST);
 
 		for (i = 0; i < 1000; i++) {
+			if (test_bit(RTL8152_INACCESSIBLE, &tp->flags))
+				break;
 			if (!(ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR) & CR_RST))
 				break;
 			usleep_range(100, 400);
@@ -3329,6 +3331,8 @@ static void rtl_disable(struct r8152 *tp)
 	rxdy_gated_en(tp, true);
 
 	for (i = 0; i < 1000; i++) {
+		if (test_bit(RTL8152_INACCESSIBLE, &tp->flags))
+			break;
 		ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
 		if ((ocp_data & FIFO_EMPTY) == FIFO_EMPTY)
 			break;
@@ -3336,6 +3340,8 @@ static void rtl_disable(struct r8152 *tp)
 	}
 
 	for (i = 0; i < 1000; i++) {
+		if (test_bit(RTL8152_INACCESSIBLE, &tp->flags))
+			break;
 		if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0) & TCR0_TX_EMPTY)
 			break;
 		usleep_range(1000, 2000);
@@ -5499,6 +5505,8 @@ static void wait_oob_link_list_ready(struct r8152 *tp)
 	int i;
 
 	for (i = 0; i < 1000; i++) {
+		if (test_bit(RTL8152_INACCESSIBLE, &tp->flags))
+			break;
 		ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
 		if (ocp_data & LINK_LIST_READY)
 			break;
@@ -5513,6 +5521,8 @@ static void r8156b_wait_loading_flash(struct r8152 *tp)
 		int i;
 
 		for (i = 0; i < 100; i++) {
+			if (test_bit(RTL8152_INACCESSIBLE, &tp->flags))
+				break;
 			if (ocp_read_word(tp, MCU_TYPE_USB, USB_GPHY_CTRL) & GPHY_PATCH_DONE)
 				break;
 			usleep_range(1000, 2000);
@@ -5635,6 +5645,8 @@ static int r8153_pre_firmware_1(struct r8152 *tp)
 	for (i = 0; i < 104; i++) {
 		u32 ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_WDT1_CTRL);
 
+		if (test_bit(RTL8152_INACCESSIBLE, &tp->flags))
+			return -ENODEV;
 		if (!(ocp_data & WTD1_EN))
 			break;
 		usleep_range(1000, 2000);
@@ -5791,6 +5803,8 @@ static void r8153_aldps_en(struct r8152 *tp, bool enable)
 		data &= ~EN_ALDPS;
 		ocp_reg_write(tp, OCP_POWER_CFG, data);
 		for (i = 0; i < 20; i++) {
+			if (test_bit(RTL8152_INACCESSIBLE, &tp->flags))
+				return;
 			usleep_range(1000, 2000);
 			if (ocp_read_word(tp, MCU_TYPE_PLA, 0xe000) & 0x0100)
 				break;
@@ -8397,6 +8411,8 @@ static int rtl8152_pre_reset(struct usb_interface *intf)
 	struct r8152 *tp = usb_get_intfdata(intf);
 	struct net_device *netdev;
 
+	rtnl_lock();
+
 	if (!tp || !test_bit(PROBED_WITH_NO_ERRORS, &tp->flags))
 		return 0;
 
@@ -8428,20 +8444,17 @@ static int rtl8152_post_reset(struct usb_interface *intf)
 	struct sockaddr sa;
 
 	if (!tp || !test_bit(PROBED_WITH_NO_ERRORS, &tp->flags))
-		return 0;
+		goto exit;
 
 	rtl_set_accessible(tp);
 
 	/* reset the MAC address in case of policy change */
-	if (determine_ethernet_addr(tp, &sa) >= 0) {
-		rtnl_lock();
+	if (determine_ethernet_addr(tp, &sa) >= 0)
 		dev_set_mac_address (tp->netdev, &sa, NULL);
-		rtnl_unlock();
-	}
 
 	netdev = tp->netdev;
 	if (!netif_running(netdev))
-		return 0;
+		goto exit;
 
 	set_bit(WORK_ENABLE, &tp->flags);
 	if (netif_carrier_ok(netdev)) {
@@ -8460,6 +8473,8 @@ static int rtl8152_post_reset(struct usb_interface *intf)
 	if (!list_empty(&tp->rx_done))
 		napi_schedule(&tp->napi);
 
+exit:
+	rtnl_unlock();
 	return 0;
 }
 
@@ -10034,6 +10049,7 @@ static const struct usb_device_id rtl8152_table[] = {
 	{ USB_DEVICE(VENDOR_ID_NVIDIA,  0x09ff) },
 	{ USB_DEVICE(VENDOR_ID_TPLINK,  0x0601) },
 	{ USB_DEVICE(VENDOR_ID_DLINK,   0xb301) },
+	{ USB_DEVICE(VENDOR_ID_ASUS,    0x1976) },
 	{}
 };
 
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 57efb3454c57..977861c46b1f 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -790,7 +790,8 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 
 			skb_add_rx_frag(nskb, i, page, page_offset, size,
 					truesize);
-			if (skb_copy_bits(skb, off, page_address(page),
+			if (skb_copy_bits(skb, off,
+					  page_address(page) + page_offset,
 					  size)) {
 				consume_skb(nskb);
 				goto drop;
diff --git a/drivers/net/wireless/ath/ath9k/Kconfig b/drivers/net/wireless/ath/ath9k/Kconfig
index e150d82eddb6..0c47be06c153 100644
--- a/drivers/net/wireless/ath/ath9k/Kconfig
+++ b/drivers/net/wireless/ath/ath9k/Kconfig
@@ -57,8 +57,7 @@ config ATH9K_AHB
 
 config ATH9K_DEBUGFS
 	bool "Atheros ath9k debugging"
-	depends on ATH9K && DEBUG_FS
-	select MAC80211_DEBUGFS
+	depends on ATH9K && DEBUG_FS && MAC80211_DEBUGFS
 	select ATH9K_COMMON_DEBUG
 	help
 	  Say Y, if you need access to ath9k's statistics for
@@ -70,7 +69,6 @@ config ATH9K_DEBUGFS
 config ATH9K_STATION_STATISTICS
 	bool "Detailed station statistics"
 	depends on ATH9K && ATH9K_DEBUGFS && DEBUG_FS
-	select MAC80211_DEBUGFS
 	default n
 	help
 	  This option enables detailed statistics for association stations.
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
index ca5e4fbcf8ce..6af606e5da65 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
@@ -707,8 +707,10 @@ int iwl_mvm_mld_add_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 			rcu_dereference_protected(mvm_sta->link[link_id],
 						  lockdep_is_held(&mvm->mutex));
 
-		if (WARN_ON(!link_conf || !mvm_link_sta))
+		if (WARN_ON(!link_conf || !mvm_link_sta)) {
+			ret = -EINVAL;
 			goto err;
+		}
 
 		ret = iwl_mvm_mld_cfg_sta(mvm, sta, vif, link_sta, link_conf,
 					  mvm_link_sta);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 63f3d4a5c9aa..2cc2d2788f83 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -375,6 +375,7 @@ static int mt7921_load_clc(struct mt792x_dev *dev, const char *fw_name)
 	int ret, i, len, offset = 0;
 	u8 *clc_base = NULL, hw_encap = 0;
 
+	dev->phy.clc_chan_conf = 0xff;
 	if (mt7921_disable_clc ||
 	    mt76_is_usb(&dev->mt76))
 		return 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c
index 15c2fb0bcb1b..aa918b9b0469 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c
@@ -14,7 +14,7 @@
 static void
 mt7925_init_he_caps(struct mt792x_phy *phy, enum nl80211_band band,
 		    struct ieee80211_sband_iftype_data *data,
-			enum nl80211_iftype iftype)
+		    enum nl80211_iftype iftype)
 {
 	struct ieee80211_sta_he_cap *he_cap = &data->he_cap;
 	struct ieee80211_he_cap_elem *he_cap_elem = &he_cap->he_cap_elem;
@@ -53,7 +53,7 @@ mt7925_init_he_caps(struct mt792x_phy *phy, enum nl80211_band band,
 		IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO |
 		IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO;
 
-	switch (i) {
+	switch (iftype) {
 	case NL80211_IFTYPE_AP:
 		he_cap_elem->mac_cap_info[2] |=
 			IEEE80211_HE_MAC_CAP2_BSR;
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 8fe2dd619e80..b309c8be720f 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -107,11 +107,12 @@ config NVME_TCP_TLS
 	  If unsure, say N.
 
 config NVME_HOST_AUTH
-	bool "NVM Express over Fabrics In-Band Authentication"
+	bool "NVMe over Fabrics In-Band Authentication in host side"
 	depends on NVME_CORE
 	select NVME_AUTH
 	help
-	  This provides support for NVMe over Fabrics In-Band Authentication.
+	  This provides support for NVMe over Fabrics In-Band Authentication in
+	  host side.
 
 	  If unsure, say N.
 
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 46a4c9c5ea96..8ebdfd623e0f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -131,7 +131,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
 	/*
 	 * Only new queue scan work when admin and IO queues are both alive
 	 */
-	if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
+	if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE && ctrl->tagset)
 		queue_work(nvme_wq, &ctrl->scan_work);
 }
 
@@ -143,7 +143,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
  */
 int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
 {
-	if (ctrl->state != NVME_CTRL_RESETTING)
+	if (nvme_ctrl_state(ctrl) != NVME_CTRL_RESETTING)
 		return -EBUSY;
 	if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
 		return -EBUSY;
@@ -156,7 +156,7 @@ static void nvme_failfast_work(struct work_struct *work)
 	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
 			struct nvme_ctrl, failfast_work);
 
-	if (ctrl->state != NVME_CTRL_CONNECTING)
+	if (nvme_ctrl_state(ctrl) != NVME_CTRL_CONNECTING)
 		return;
 
 	set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
@@ -200,7 +200,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
 	ret = nvme_reset_ctrl(ctrl);
 	if (!ret) {
 		flush_work(&ctrl->reset_work);
-		if (ctrl->state != NVME_CTRL_LIVE)
+		if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
 			ret = -ENETRESET;
 	}
 
@@ -499,7 +499,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 
 	spin_lock_irqsave(&ctrl->lock, flags);
 
-	old_state = ctrl->state;
+	old_state = nvme_ctrl_state(ctrl);
 	switch (new_state) {
 	case NVME_CTRL_LIVE:
 		switch (old_state) {
@@ -567,7 +567,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 	}
 
 	if (changed) {
-		ctrl->state = new_state;
+		WRITE_ONCE(ctrl->state, new_state);
 		wake_up_all(&ctrl->state_wq);
 	}
 
@@ -575,11 +575,11 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 	if (!changed)
 		return false;
 
-	if (ctrl->state == NVME_CTRL_LIVE) {
+	if (new_state == NVME_CTRL_LIVE) {
 		if (old_state == NVME_CTRL_CONNECTING)
 			nvme_stop_failfast_work(ctrl);
 		nvme_kick_requeue_lists(ctrl);
-	} else if (ctrl->state == NVME_CTRL_CONNECTING &&
+	} else if (new_state == NVME_CTRL_CONNECTING &&
 		old_state == NVME_CTRL_RESETTING) {
 		nvme_start_failfast_work(ctrl);
 	}
@@ -592,7 +592,7 @@ EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
  */
 static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
 {
-	switch (ctrl->state) {
+	switch (nvme_ctrl_state(ctrl)) {
 	case NVME_CTRL_NEW:
 	case NVME_CTRL_LIVE:
 	case NVME_CTRL_RESETTING:
@@ -617,7 +617,7 @@ bool nvme_wait_reset(struct nvme_ctrl *ctrl)
 	wait_event(ctrl->state_wq,
 		   nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
 		   nvme_state_terminal(ctrl));
-	return ctrl->state == NVME_CTRL_RESETTING;
+	return nvme_ctrl_state(ctrl) == NVME_CTRL_RESETTING;
 }
 EXPORT_SYMBOL_GPL(nvme_wait_reset);
 
@@ -704,9 +704,11 @@ EXPORT_SYMBOL_GPL(nvme_init_request);
 blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
 		struct request *rq)
 {
-	if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
-	    ctrl->state != NVME_CTRL_DELETING &&
-	    ctrl->state != NVME_CTRL_DEAD &&
+	enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
+
+	if (state != NVME_CTRL_DELETING_NOIO &&
+	    state != NVME_CTRL_DELETING &&
+	    state != NVME_CTRL_DEAD &&
 	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
 	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
 		return BLK_STS_RESOURCE;
@@ -736,7 +738,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
 		 * command, which is require to set the queue live in the
 		 * appropinquate states.
 		 */
-		switch (ctrl->state) {
+		switch (nvme_ctrl_state(ctrl)) {
 		case NVME_CTRL_CONNECTING:
 			if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
 			    (req->cmd->fabrics.fctype == nvme_fabrics_type_connect ||
@@ -1192,8 +1194,16 @@ static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl)
 
 static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
 {
-	queue_delayed_work(nvme_wq, &ctrl->ka_work,
-			   nvme_keep_alive_work_period(ctrl));
+	unsigned long now = jiffies;
+	unsigned long delay = nvme_keep_alive_work_period(ctrl);
+	unsigned long ka_next_check_tm = ctrl->ka_last_check_time + delay;
+
+	if (time_after(now, ka_next_check_tm))
+		delay = 0;
+	else
+		delay = ka_next_check_tm - now;
+
+	queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
 }
 
 static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
@@ -1479,7 +1489,8 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
 	if (id->ncap == 0) {
 		/* namespace not allocated or attached */
 		info->is_removed = true;
-		return -ENODEV;
+		ret = -ENODEV;
+		goto error;
 	}
 
 	info->anagrpid = id->anagrpid;
@@ -1497,8 +1508,10 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
 		    !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
 			memcpy(ids->nguid, id->nguid, sizeof(ids->nguid));
 	}
+
+error:
 	kfree(id);
-	return 0;
+	return ret;
 }
 
 static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
@@ -1890,9 +1903,10 @@ static void nvme_update_disk_info(struct gendisk *disk,
 
 	/*
 	 * The block layer can't support LBA sizes larger than the page size
-	 * yet, so catch this early and don't allow block I/O.
+	 * or smaller than a sector size yet, so catch this early and don't
+	 * allow block I/O.
 	 */
-	if (ns->lba_shift > PAGE_SHIFT) {
+	if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) {
 		capacity = 0;
 		bs = (1 << 9);
 	}
@@ -2029,6 +2043,13 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
 	if (ret)
 		return ret;
 
+	if (id->ncap == 0) {
+		/* namespace not allocated or attached */
+		info->is_removed = true;
+		ret = -ENODEV;
+		goto error;
+	}
+
 	blk_mq_freeze_queue(ns->disk->queue);
 	lbaf = nvme_lbaf_index(id->flbas);
 	ns->lba_shift = id->lbaf[lbaf].ds;
@@ -2090,6 +2111,8 @@ out:
 		set_bit(NVME_NS_READY, &ns->flags);
 		ret = 0;
 	}
+
+error:
 	kfree(id);
 	return ret;
 }
@@ -2529,7 +2552,7 @@ static void nvme_set_latency_tolerance(struct device *dev, s32 val)
 
 	if (ctrl->ps_max_latency_us != latency) {
 		ctrl->ps_max_latency_us = latency;
-		if (ctrl->state == NVME_CTRL_LIVE)
+		if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
 			nvme_configure_apst(ctrl);
 	}
 }
@@ -3217,7 +3240,7 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
 	struct nvme_ctrl *ctrl =
 		container_of(inode->i_cdev, struct nvme_ctrl, cdev);
 
-	switch (ctrl->state) {
+	switch (nvme_ctrl_state(ctrl)) {
 	case NVME_CTRL_LIVE:
 		break;
 	default:
@@ -3639,6 +3662,14 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
 		goto out_unlink_ns;
 
 	down_write(&ctrl->namespaces_rwsem);
+	/*
+	 * Ensure that no namespaces are added to the ctrl list after the queues
+	 * are frozen, thereby avoiding a deadlock between scan and reset.
+	 */
+	if (test_bit(NVME_CTRL_FROZEN, &ctrl->flags)) {
+		up_write(&ctrl->namespaces_rwsem);
+		goto out_unlink_ns;
+	}
 	nvme_ns_add_to_ctrl_list(ns);
 	up_write(&ctrl->namespaces_rwsem);
 	nvme_get_ctrl(ctrl);
@@ -3903,7 +3934,7 @@ static void nvme_scan_work(struct work_struct *work)
 	int ret;
 
 	/* No tagset on a live ctrl means IO queues could not created */
-	if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
+	if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE || !ctrl->tagset)
 		return;
 
 	/*
@@ -3973,7 +4004,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 	 * removing the namespaces' disks; fail all the queues now to avoid
 	 * potentially having to clean up the failed sync later.
 	 */
-	if (ctrl->state == NVME_CTRL_DEAD)
+	if (nvme_ctrl_state(ctrl) == NVME_CTRL_DEAD)
 		nvme_mark_namespaces_dead(ctrl);
 
 	/* this is a no-op when called from the controller reset handler */
@@ -4055,7 +4086,7 @@ static void nvme_async_event_work(struct work_struct *work)
 	 * flushing ctrl async_event_work after changing the controller state
 	 * from LIVE and before freeing the admin queue.
 	*/
-	if (ctrl->state == NVME_CTRL_LIVE)
+	if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
 		ctrl->ops->submit_async_event(ctrl);
 }
 
@@ -4450,7 +4481,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 {
 	int ret;
 
-	ctrl->state = NVME_CTRL_NEW;
+	WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
 	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
 	spin_lock_init(&ctrl->lock);
 	mutex_init(&ctrl->scan_lock);
@@ -4471,6 +4502,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 	INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
 	memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
 	ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
+	ctrl->ka_last_check_time = jiffies;
 
 	BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
 			PAGE_SIZE);
@@ -4559,6 +4591,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl)
 	list_for_each_entry(ns, &ctrl->namespaces, list)
 		blk_mq_unfreeze_queue(ns->queue);
 	up_read(&ctrl->namespaces_rwsem);
+	clear_bit(NVME_CTRL_FROZEN, &ctrl->flags);
 }
 EXPORT_SYMBOL_GPL(nvme_unfreeze);
 
@@ -4592,6 +4625,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
 
+	set_bit(NVME_CTRL_FROZEN, &ctrl->flags);
 	down_read(&ctrl->namespaces_rwsem);
 	list_for_each_entry(ns, &ctrl->namespaces, list)
 		blk_freeze_queue_start(ns->queue);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 9f9a3b35dc64..fb22976a36a8 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -557,7 +557,7 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport)
 static void
 nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
 {
-	switch (ctrl->ctrl.state) {
+	switch (nvme_ctrl_state(&ctrl->ctrl)) {
 	case NVME_CTRL_NEW:
 	case NVME_CTRL_CONNECTING:
 		/*
@@ -793,7 +793,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
 		"NVME-FC{%d}: controller connectivity lost. Awaiting "
 		"Reconnect", ctrl->cnum);
 
-	switch (ctrl->ctrl.state) {
+	switch (nvme_ctrl_state(&ctrl->ctrl)) {
 	case NVME_CTRL_NEW:
 	case NVME_CTRL_LIVE:
 		/*
@@ -3319,7 +3319,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
 	unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
 	bool recon = true;
 
-	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
+	if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_CONNECTING)
 		return;
 
 	if (portptr->port_state == FC_OBJSTATE_ONLINE) {
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 529b9954d2b8..4939ed35638f 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -18,15 +18,12 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
 {
 	u32 effects;
 
-	if (capable(CAP_SYS_ADMIN))
-		return true;
-
 	/*
 	 * Do not allow unprivileged passthrough on partitions, as that allows an
 	 * escape from the containment of the partition.
 	 */
 	if (flags & NVME_IOCTL_PARTITION)
-		return false;
+		goto admin;
 
 	/*
 	 * Do not allow unprivileged processes to send vendor specific or fabrics
@@ -34,7 +31,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
 	 */
 	if (c->common.opcode >= nvme_cmd_vendor_start ||
 	    c->common.opcode == nvme_fabrics_command)
-		return false;
+		goto admin;
 
 	/*
 	 * Do not allow unprivileged passthrough of admin commands except
@@ -53,7 +50,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
 				return true;
 			}
 		}
-		return false;
+		goto admin;
 	}
 
 	/*
@@ -63,7 +60,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
 	 */
 	effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode);
 	if (!(effects & NVME_CMD_EFFECTS_CSUPP))
-		return false;
+		goto admin;
 
 	/*
 	 * Don't allow passthrough for command that have intrusive (or unknown)
@@ -72,16 +69,20 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
 	if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
 			NVME_CMD_EFFECTS_UUID_SEL |
 			NVME_CMD_EFFECTS_SCOPE_MASK))
-		return false;
+		goto admin;
 
 	/*
 	 * Only allow I/O commands that transfer data to the controller or that
 	 * change the logical block contents if the file descriptor is open for
 	 * writing.
 	 */
-	if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC))
-		return open_for_write;
+	if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) &&
+	    !open_for_write)
+		goto admin;
+
 	return true;
+admin:
+	return capable(CAP_SYS_ADMIN);
 }
 
 /*
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 39a90b7cb125..e7411dac00f7 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -156,6 +156,11 @@ enum nvme_quirks {
 	 * No temperature thresholds for channels other than 0 (Composite).
 	 */
 	NVME_QUIRK_NO_SECONDARY_TEMP_THRESH	= (1 << 19),
+
+	/*
+	 * Disables simple suspend/resume path.
+	 */
+	NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND	= (1 << 20),
 };
 
 /*
@@ -251,6 +256,7 @@ enum nvme_ctrl_flags {
 	NVME_CTRL_STOPPED		= 3,
 	NVME_CTRL_SKIP_ID_CNS_CS	= 4,
 	NVME_CTRL_DIRTY_CAPABILITY	= 5,
+	NVME_CTRL_FROZEN		= 6,
 };
 
 struct nvme_ctrl {
@@ -387,6 +393,11 @@ struct nvme_ctrl {
 	enum nvme_dctype dctype;
 };
 
+static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
+{
+	return READ_ONCE(ctrl->state);
+}
+
 enum nvme_iopolicy {
 	NVME_IOPOLICY_NUMA,
 	NVME_IOPOLICY_RR,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 507bc149046d..61af7ff1a9d6 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1233,7 +1233,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
 	bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
 
 	/* If there is a reset/reinit ongoing, we shouldn't reset again. */
-	switch (dev->ctrl.state) {
+	switch (nvme_ctrl_state(&dev->ctrl)) {
 	case NVME_CTRL_RESETTING:
 	case NVME_CTRL_CONNECTING:
 		return false;
@@ -1321,7 +1321,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 	 * cancellation error. All outstanding requests are completed on
 	 * shutdown, so we return BLK_EH_DONE.
 	 */
-	switch (dev->ctrl.state) {
+	switch (nvme_ctrl_state(&dev->ctrl)) {
 	case NVME_CTRL_CONNECTING:
 		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
 		fallthrough;
@@ -1593,7 +1593,7 @@ static int nvme_setup_io_queues_trylock(struct nvme_dev *dev)
 	/*
 	 * Controller is in wrong state, fail early.
 	 */
-	if (dev->ctrl.state != NVME_CTRL_CONNECTING) {
+	if (nvme_ctrl_state(&dev->ctrl) != NVME_CTRL_CONNECTING) {
 		mutex_unlock(&dev->shutdown_lock);
 		return -ENODEV;
 	}
@@ -2573,13 +2573,13 @@ static bool nvme_pci_ctrl_is_dead(struct nvme_dev *dev)
 
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
+	enum nvme_ctrl_state state = nvme_ctrl_state(&dev->ctrl);
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	bool dead;
 
 	mutex_lock(&dev->shutdown_lock);
 	dead = nvme_pci_ctrl_is_dead(dev);
-	if (dev->ctrl.state == NVME_CTRL_LIVE ||
-	    dev->ctrl.state == NVME_CTRL_RESETTING) {
+	if (state == NVME_CTRL_LIVE || state == NVME_CTRL_RESETTING) {
 		if (pci_is_enabled(pdev))
 			nvme_start_freeze(&dev->ctrl);
 		/*
@@ -2690,7 +2690,7 @@ static void nvme_reset_work(struct work_struct *work)
 	bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
 	int result;
 
-	if (dev->ctrl.state != NVME_CTRL_RESETTING) {
+	if (nvme_ctrl_state(&dev->ctrl) != NVME_CTRL_RESETTING) {
 		dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n",
 			 dev->ctrl.state);
 		result = -ENODEV;
@@ -2902,6 +2902,18 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
 		if ((dmi_match(DMI_BOARD_VENDOR, "LENOVO")) &&
 		     dmi_match(DMI_BOARD_NAME, "LNVNB161216"))
 			return NVME_QUIRK_SIMPLE_SUSPEND;
+	} else if (pdev->vendor == 0x2646 && (pdev->device == 0x2263 ||
+		   pdev->device == 0x500f)) {
+		/*
+		 * Exclude some Kingston NV1 and A2000 devices from
+		 * NVME_QUIRK_SIMPLE_SUSPEND. Do a full suspend to save a
+		 * lot fo energy with s2idle sleep on some TUXEDO platforms.
+		 */
+		if (dmi_match(DMI_BOARD_NAME, "NS5X_NS7XAU") ||
+		    dmi_match(DMI_BOARD_NAME, "NS5x_7xAU") ||
+		    dmi_match(DMI_BOARD_NAME, "NS5x_7xPU") ||
+		    dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1"))
+			return NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND;
 	}
 
 	return 0;
@@ -2932,7 +2944,9 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
 	dev->dev = get_device(&pdev->dev);
 
 	quirks |= check_vendor_combination_bug(pdev);
-	if (!noacpi && acpi_storage_d3(&pdev->dev)) {
+	if (!noacpi &&
+	    !(quirks & NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND) &&
+	    acpi_storage_d3(&pdev->dev)) {
 		/*
 		 * Some systems use a bios work around to ask for D3 on
 		 * platforms that support kernel managed suspend.
@@ -3192,7 +3206,7 @@ static int nvme_suspend(struct device *dev)
 	nvme_wait_freeze(ctrl);
 	nvme_sync_queues(ctrl);
 
-	if (ctrl->state != NVME_CTRL_LIVE)
+	if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
 		goto unfreeze;
 
 	/*
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 6d178d555920..81e2621169e5 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -984,10 +984,11 @@ free_ctrl:
 
 static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
 {
+	enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+
 	/* If we are resetting/deleting then do nothing */
-	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
-		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
-			ctrl->ctrl.state == NVME_CTRL_LIVE);
+	if (state != NVME_CTRL_CONNECTING) {
+		WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE);
 		return;
 	}
 
@@ -1059,8 +1060,10 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
 		 * unless we're during creation of a new controller to
 		 * avoid races with teardown flow.
 		 */
-		WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
-			     ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
+		enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+
+		WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
+			     state != NVME_CTRL_DELETING_NOIO);
 		WARN_ON_ONCE(new);
 		ret = -EINVAL;
 		goto destroy_io;
@@ -1129,8 +1132,10 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
 		/* state change failure is ok if we started ctrl delete */
-		WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
-			     ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
+		enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+
+		WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
+			     state != NVME_CTRL_DELETING_NOIO);
 		return;
 	}
 
@@ -1162,7 +1167,7 @@ static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
 	struct nvme_rdma_queue *queue = wc->qp->qp_context;
 	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
 
-	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+	if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE)
 		dev_info(ctrl->ctrl.device,
 			     "%s for CQE 0x%p failed with status %s (%d)\n",
 			     op, wc->wr_cqe,
@@ -1945,7 +1950,7 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
 	dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
 		 rq->tag, nvme_rdma_queue_idx(queue));
 
-	if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
+	if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) {
 		/*
 		 * If we are resetting, connecting or deleting we should
 		 * complete immediately because we may block controller
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index d79811cfa0ce..08805f027810 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2152,10 +2152,11 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
 
 static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
 {
+	enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
+
 	/* If we are resetting/deleting then do nothing */
-	if (ctrl->state != NVME_CTRL_CONNECTING) {
-		WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
-			ctrl->state == NVME_CTRL_LIVE);
+	if (state != NVME_CTRL_CONNECTING) {
+		WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE);
 		return;
 	}
 
@@ -2215,8 +2216,10 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
 		 * unless we're during creation of a new controller to
 		 * avoid races with teardown flow.
 		 */
-		WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
-			     ctrl->state != NVME_CTRL_DELETING_NOIO);
+		enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
+
+		WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
+			     state != NVME_CTRL_DELETING_NOIO);
 		WARN_ON_ONCE(new);
 		ret = -EINVAL;
 		goto destroy_io;
@@ -2280,8 +2283,10 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
 
 	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
 		/* state change failure is ok if we started ctrl delete */
-		WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
-			     ctrl->state != NVME_CTRL_DELETING_NOIO);
+		enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
+
+		WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
+			     state != NVME_CTRL_DELETING_NOIO);
 		return;
 	}
 
@@ -2311,8 +2316,10 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 
 	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
 		/* state change failure is ok if we started ctrl delete */
-		WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
-			     ctrl->state != NVME_CTRL_DELETING_NOIO);
+		enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
+
+		WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
+			     state != NVME_CTRL_DELETING_NOIO);
 		return;
 	}
 
@@ -2430,7 +2437,7 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
 		nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type,
 		opc, nvme_opcode_str(qid, opc, fctype));
 
-	if (ctrl->state != NVME_CTRL_LIVE) {
+	if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) {
 		/*
 		 * If we are resetting, connecting or deleting we should
 		 * complete immediately because we may block controller
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index e1ebc73f3e5e..872dd1a0acd8 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -99,10 +99,11 @@ config NVME_TARGET_TCP_TLS
 	  If unsure, say N.
 
 config NVME_TARGET_AUTH
-	bool "NVMe over Fabrics In-band Authentication support"
+	bool "NVMe over Fabrics In-band Authentication in target side"
 	depends on NVME_TARGET
 	select NVME_AUTH
 	help
-	  This enables support for NVMe over Fabrics In-band Authentication
+	  This enables support for NVMe over Fabrics In-band Authentication in
+	  target side.
 
 	  If unsure, say N.
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index e307a044b1a1..d937fe05129e 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -18,6 +18,7 @@
 #include <linux/nvme-keyring.h>
 #include <crypto/hash.h>
 #include <crypto/kpp.h>
+#include <linux/nospec.h>
 
 #include "nvmet.h"
 
@@ -621,6 +622,7 @@ static ssize_t nvmet_ns_ana_grpid_store(struct config_item *item,
 
 	down_write(&nvmet_ana_sem);
 	oldgrpid = ns->anagrpid;
+	newgrpid = array_index_nospec(newgrpid, NVMET_MAX_ANAGRPS);
 	nvmet_ana_group_enabled[newgrpid]++;
 	ns->anagrpid = newgrpid;
 	nvmet_ana_group_enabled[oldgrpid]--;
@@ -1812,6 +1814,7 @@ static struct config_group *nvmet_ana_groups_make_group(
 	grp->grpid = grpid;
 
 	down_write(&nvmet_ana_sem);
+	grpid = array_index_nospec(grpid, NVMET_MAX_ANAGRPS);
 	nvmet_ana_group_enabled[grpid]++;
 	up_write(&nvmet_ana_sem);
 
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index bf42b7e826db..608b352a7d91 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -796,6 +796,12 @@ static struct nvmem_layout *nvmem_layout_get(struct nvmem_device *nvmem)
 	if (!layout_np)
 		return NULL;
 
+	/* Fixed layouts don't have a matching driver */
+	if (of_device_is_compatible(layout_np, "fixed-layout")) {
+		of_node_put(layout_np);
+		return NULL;
+	}
+
 	/*
 	 * In case the nvmem device was built-in while the layout was built as a
 	 * module, we shall manually request the layout driver loading otherwise
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index f63250c650ca..3bf27052832f 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -98,8 +98,9 @@ int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p)
  *
  * Returns the new state of a device based on the notifier used.
  *
- * Return: 0 on device going from enabled to disabled, 1 on device
- * going from disabled to enabled and -1 on no change.
+ * Return: OF_RECONFIG_CHANGE_REMOVE on device going from enabled to
+ * disabled, OF_RECONFIG_CHANGE_ADD on device going from disabled to
+ * enabled and OF_RECONFIG_NO_CHANGE on no change.
  */
 int of_reconfig_get_state_change(unsigned long action, struct of_reconfig_data *pr)
 {
diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 1f236aaf7867..f33b5d1ddfc1 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -2658,6 +2658,8 @@ enum parport_pc_pci_cards {
 	asix_ax99100,
 	quatech_sppxp100,
 	wch_ch382l,
+	brainboxes_uc146,
+	brainboxes_px203,
 };
 
 
@@ -2737,6 +2739,8 @@ static struct parport_pc_pci {
 	/* asix_ax99100 */		{ 1, { { 0, 1 }, } },
 	/* quatech_sppxp100 */		{ 1, { { 0, 1 }, } },
 	/* wch_ch382l */		{ 1, { { 2, -1 }, } },
+	/* brainboxes_uc146 */	{ 1, { { 3, -1 }, } },
+	/* brainboxes_px203 */	{ 1, { { 0, -1 }, } },
 };
 
 static const struct pci_device_id parport_pc_pci_tbl[] = {
@@ -2833,6 +2837,23 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 },
 	/* WCH CH382L PCI-E single parallel port card */
 	{ 0x1c00, 0x3050, 0x1c00, 0x3050, 0, 0, wch_ch382l },
+	/* Brainboxes IX-500/550 */
+	{ PCI_VENDOR_ID_INTASHIELD, 0x402a,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport },
+	/* Brainboxes UC-146/UC-157 */
+	{ PCI_VENDOR_ID_INTASHIELD, 0x0be1,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, brainboxes_uc146 },
+	{ PCI_VENDOR_ID_INTASHIELD, 0x0be2,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, brainboxes_uc146 },
+	/* Brainboxes PX-146/PX-257 */
+	{ PCI_VENDOR_ID_INTASHIELD, 0x401c,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport },
+	/* Brainboxes PX-203 */
+	{ PCI_VENDOR_ID_INTASHIELD, 0x4007,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, brainboxes_px203 },
+	/* Brainboxes PX-475 */
+	{ PCI_VENDOR_ID_INTASHIELD, 0x401f,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport },
 	{ 0, } /* terminate list */
 };
 MODULE_DEVICE_TABLE(pci, parport_pc_pci_tbl);
diff --git a/drivers/pinctrl/cirrus/Kconfig b/drivers/pinctrl/cirrus/Kconfig
index d6318cb57aff..e7e827a8877a 100644
--- a/drivers/pinctrl/cirrus/Kconfig
+++ b/drivers/pinctrl/cirrus/Kconfig
@@ -12,7 +12,8 @@ config PINCTRL_CS42L43
 
 config PINCTRL_LOCHNAGAR
 	tristate "Cirrus Logic Lochnagar pinctrl driver"
-	depends on MFD_LOCHNAGAR
+	# Avoid clash caused by MIPS defining RST, which is used in the driver
+	depends on MFD_LOCHNAGAR && !MIPS
 	select GPIOLIB
 	select PINMUX
 	select PINCONF
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 1fa89be29b8f..f2977eb65522 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -1262,17 +1262,17 @@ static void pinctrl_link_add(struct pinctrl_dev *pctldev,
 static int pinctrl_commit_state(struct pinctrl *p, struct pinctrl_state *state)
 {
 	struct pinctrl_setting *setting, *setting2;
-	struct pinctrl_state *old_state = p->state;
+	struct pinctrl_state *old_state = READ_ONCE(p->state);
 	int ret;
 
-	if (p->state) {
+	if (old_state) {
 		/*
 		 * For each pinmux setting in the old state, forget SW's record
 		 * of mux owner for that pingroup. Any pingroups which are
 		 * still owned by the new state will be re-acquired by the call
 		 * to pinmux_enable_setting() in the loop below.
 		 */
-		list_for_each_entry(setting, &p->state->settings, node) {
+		list_for_each_entry(setting, &old_state->settings, node) {
 			if (setting->type != PIN_MAP_TYPE_MUX_GROUP)
 				continue;
 			pinmux_disable_setting(setting);
diff --git a/drivers/pinctrl/nxp/pinctrl-s32cc.c b/drivers/pinctrl/nxp/pinctrl-s32cc.c
index 7daff9f186cd..f0cad2c501f7 100644
--- a/drivers/pinctrl/nxp/pinctrl-s32cc.c
+++ b/drivers/pinctrl/nxp/pinctrl-s32cc.c
@@ -843,8 +843,8 @@ static int s32_pinctrl_probe_dt(struct platform_device *pdev,
 	if (!np)
 		return -ENODEV;
 
-	if (mem_regions == 0) {
-		dev_err(&pdev->dev, "mem_regions is 0\n");
+	if (mem_regions == 0 || mem_regions >= 10000) {
+		dev_err(&pdev->dev, "mem_regions is invalid: %u\n", mem_regions);
 		return -EINVAL;
 	}
 
diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c
index 04285c930e94..4ccfa99ed93a 100644
--- a/drivers/pinctrl/pinctrl-cy8c95x0.c
+++ b/drivers/pinctrl/pinctrl-cy8c95x0.c
@@ -143,6 +143,7 @@ static const struct dmi_system_id cy8c95x0_dmi_acpi_irq_info[] = {
  * @pinctrl_desc:   pin controller description
  * @name:           Chip controller name
  * @tpin:           Total number of pins
+ * @gpio_reset:     GPIO line handler that can reset the IC
  */
 struct cy8c95x0_pinctrl {
 	struct regmap *regmap;
diff --git a/drivers/pinctrl/realtek/pinctrl-rtd.c b/drivers/pinctrl/realtek/pinctrl-rtd.c
index 9c7a1af4ba69..208896593b61 100644
--- a/drivers/pinctrl/realtek/pinctrl-rtd.c
+++ b/drivers/pinctrl/realtek/pinctrl-rtd.c
@@ -146,7 +146,7 @@ static int rtd_pinctrl_get_function_groups(struct pinctrl_dev *pcdev,
 
 static const struct rtd_pin_desc *rtd_pinctrl_find_mux(struct rtd_pinctrl *data, unsigned int pin)
 {
-	if (!data->info->muxes[pin].name)
+	if (data->info->muxes[pin].name)
 		return &data->info->muxes[pin];
 
 	return NULL;
@@ -249,7 +249,7 @@ static const struct pinctrl_pin_desc
 static const struct rtd_pin_config_desc
 	*rtd_pinctrl_find_config(struct rtd_pinctrl *data, unsigned int pin)
 {
-	if (!data->info->configs[pin].name)
+	if (data->info->configs[pin].name)
 		return &data->info->configs[pin];
 
 	return NULL;
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index 64e8201c7eac..603f900e88c1 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -1273,9 +1273,11 @@ static struct stm32_desc_pin *stm32_pctrl_get_desc_pin_from_gpio(struct stm32_pi
 	int i;
 
 	/* With few exceptions (e.g. bank 'Z'), pin number matches with pin index in array */
-	pin_desc = pctl->pins + stm32_pin_nb;
-	if (pin_desc->pin.number == stm32_pin_nb)
-		return pin_desc;
+	if (stm32_pin_nb < pctl->npins) {
+		pin_desc = pctl->pins + stm32_pin_nb;
+		if (pin_desc->pin.number == stm32_pin_nb)
+			return pin_desc;
+	}
 
 	/* Otherwise, loop all array to find the pin with the right number */
 	for (i = 0; i < pctl->npins; i++) {
@@ -1368,6 +1370,11 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, struct fwnode
 	}
 
 	names = devm_kcalloc(dev, npins, sizeof(char *), GFP_KERNEL);
+	if (!names) {
+		err = -ENOMEM;
+		goto err_clk;
+	}
+
 	for (i = 0; i < npins; i++) {
 		stm32_pin = stm32_pctrl_get_desc_pin_from_gpio(pctl, bank, i);
 		if (stm32_pin && stm32_pin->pin.name)
diff --git a/drivers/platform/mellanox/mlxbf-bootctl.c b/drivers/platform/mellanox/mlxbf-bootctl.c
index 1ac7dab22c63..c1aef3a8fb2d 100644
--- a/drivers/platform/mellanox/mlxbf-bootctl.c
+++ b/drivers/platform/mellanox/mlxbf-bootctl.c
@@ -20,6 +20,7 @@
 
 #define MLXBF_BOOTCTL_SB_SECURE_MASK		0x03
 #define MLXBF_BOOTCTL_SB_TEST_MASK		0x0c
+#define MLXBF_BOOTCTL_SB_DEV_MASK		BIT(4)
 
 #define MLXBF_SB_KEY_NUM			4
 
@@ -40,11 +41,18 @@ static struct mlxbf_bootctl_name boot_names[] = {
 	{ MLXBF_BOOTCTL_NONE, "none" },
 };
 
+enum {
+	MLXBF_BOOTCTL_SB_LIFECYCLE_PRODUCTION = 0,
+	MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE = 1,
+	MLXBF_BOOTCTL_SB_LIFECYCLE_GA_NON_SECURE = 2,
+	MLXBF_BOOTCTL_SB_LIFECYCLE_RMA = 3
+};
+
 static const char * const mlxbf_bootctl_lifecycle_states[] = {
-	[0] = "Production",
-	[1] = "GA Secured",
-	[2] = "GA Non-Secured",
-	[3] = "RMA",
+	[MLXBF_BOOTCTL_SB_LIFECYCLE_PRODUCTION] = "Production",
+	[MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE] = "GA Secured",
+	[MLXBF_BOOTCTL_SB_LIFECYCLE_GA_NON_SECURE] = "GA Non-Secured",
+	[MLXBF_BOOTCTL_SB_LIFECYCLE_RMA] = "RMA",
 };
 
 /* Log header format. */
@@ -247,25 +255,30 @@ static ssize_t second_reset_action_store(struct device *dev,
 static ssize_t lifecycle_state_show(struct device *dev,
 				    struct device_attribute *attr, char *buf)
 {
+	int status_bits;
+	int use_dev_key;
+	int test_state;
 	int lc_state;
 
-	lc_state = mlxbf_bootctl_smc(MLXBF_BOOTCTL_GET_TBB_FUSE_STATUS,
-				     MLXBF_BOOTCTL_FUSE_STATUS_LIFECYCLE);
-	if (lc_state < 0)
-		return lc_state;
+	status_bits = mlxbf_bootctl_smc(MLXBF_BOOTCTL_GET_TBB_FUSE_STATUS,
+					MLXBF_BOOTCTL_FUSE_STATUS_LIFECYCLE);
+	if (status_bits < 0)
+		return status_bits;
 
-	lc_state &=
-		MLXBF_BOOTCTL_SB_TEST_MASK | MLXBF_BOOTCTL_SB_SECURE_MASK;
+	use_dev_key = status_bits & MLXBF_BOOTCTL_SB_DEV_MASK;
+	test_state = status_bits & MLXBF_BOOTCTL_SB_TEST_MASK;
+	lc_state = status_bits & MLXBF_BOOTCTL_SB_SECURE_MASK;
 
 	/*
 	 * If the test bits are set, we specify that the current state may be
 	 * due to using the test bits.
 	 */
-	if (lc_state & MLXBF_BOOTCTL_SB_TEST_MASK) {
-		lc_state &= MLXBF_BOOTCTL_SB_SECURE_MASK;
-
+	if (test_state) {
 		return sprintf(buf, "%s(test)\n",
 			       mlxbf_bootctl_lifecycle_states[lc_state]);
+	} else if (use_dev_key &&
+		   (lc_state == MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE)) {
+		return sprintf(buf, "Secured (development)\n");
 	}
 
 	return sprintf(buf, "%s\n", mlxbf_bootctl_lifecycle_states[lc_state]);
diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c
index 0b427fc24a96..1dd84c7a79de 100644
--- a/drivers/platform/mellanox/mlxbf-pmc.c
+++ b/drivers/platform/mellanox/mlxbf-pmc.c
@@ -1771,6 +1771,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num)
 	attr->dev_attr.show = mlxbf_pmc_event_list_show;
 	attr->nr = blk_num;
 	attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, "event_list");
+	if (!attr->dev_attr.attr.name)
+		return -ENOMEM;
 	pmc->block[blk_num].block_attr[i] = &attr->dev_attr.attr;
 	attr = NULL;
 
@@ -1784,6 +1786,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num)
 		attr->nr = blk_num;
 		attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL,
 							  "enable");
+		if (!attr->dev_attr.attr.name)
+			return -ENOMEM;
 		pmc->block[blk_num].block_attr[++i] = &attr->dev_attr.attr;
 		attr = NULL;
 	}
@@ -1810,6 +1814,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num)
 		attr->nr = blk_num;
 		attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL,
 							  "counter%d", j);
+		if (!attr->dev_attr.attr.name)
+			return -ENOMEM;
 		pmc->block[blk_num].block_attr[++i] = &attr->dev_attr.attr;
 		attr = NULL;
 
@@ -1821,6 +1827,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num)
 		attr->nr = blk_num;
 		attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL,
 							  "event%d", j);
+		if (!attr->dev_attr.attr.name)
+			return -ENOMEM;
 		pmc->block[blk_num].block_attr[++i] = &attr->dev_attr.attr;
 		attr = NULL;
 	}
@@ -1853,6 +1861,8 @@ static int mlxbf_pmc_init_perftype_reg(struct device *dev, int blk_num)
 		attr->nr = blk_num;
 		attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL,
 							  events[j].evt_name);
+		if (!attr->dev_attr.attr.name)
+			return -ENOMEM;
 		pmc->block[blk_num].block_attr[i] = &attr->dev_attr.attr;
 		attr = NULL;
 		i++;
@@ -1882,6 +1892,8 @@ static int mlxbf_pmc_create_groups(struct device *dev, int blk_num)
 	pmc->block[blk_num].block_attr_grp.attrs = pmc->block[blk_num].block_attr;
 	pmc->block[blk_num].block_attr_grp.name = devm_kasprintf(
 		dev, GFP_KERNEL, pmc->block_name[blk_num]);
+	if (!pmc->block[blk_num].block_attr_grp.name)
+		return -ENOMEM;
 	pmc->groups[pmc->group_num] = &pmc->block[blk_num].block_attr_grp;
 	pmc->group_num++;
 
@@ -2063,6 +2075,8 @@ static int mlxbf_pmc_probe(struct platform_device *pdev)
 
 	pmc->hwmon_dev = devm_hwmon_device_register_with_groups(
 		dev, "bfperf", pmc, pmc->groups);
+	if (IS_ERR(pmc->hwmon_dev))
+		return PTR_ERR(pmc->hwmon_dev);
 	platform_set_drvdata(pdev, pmc);
 
 	return 0;
diff --git a/drivers/platform/surface/aggregator/core.c b/drivers/platform/surface/aggregator/core.c
index 1a6373dea109..6152be38398c 100644
--- a/drivers/platform/surface/aggregator/core.c
+++ b/drivers/platform/surface/aggregator/core.c
@@ -231,9 +231,12 @@ static int ssam_receive_buf(struct serdev_device *dev, const unsigned char *buf,
 			    size_t n)
 {
 	struct ssam_controller *ctrl;
+	int ret;
 
 	ctrl = serdev_device_get_drvdata(dev);
-	return ssam_controller_receive_buf(ctrl, buf, n);
+	ret = ssam_controller_receive_buf(ctrl, buf, n);
+
+	return ret < 0 ? 0 : ret;
 }
 
 static void ssam_write_wakeup(struct serdev_device *dev)
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 7e69fdaccdd5..c94f31a5c6a3 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -263,6 +263,7 @@ config ASUS_WMI
 	depends on RFKILL || RFKILL = n
 	depends on HOTPLUG_PCI
 	depends on ACPI_VIDEO || ACPI_VIDEO = n
+	depends on SERIO_I8042 || SERIO_I8042 = n
 	select INPUT_SPARSEKMAP
 	select LEDS_CLASS
 	select NEW_LEDS
@@ -279,7 +280,6 @@ config ASUS_WMI
 config ASUS_NB_WMI
 	tristate "Asus Notebook WMI Driver"
 	depends on ASUS_WMI
-	depends on SERIO_I8042 || SERIO_I8042 = n
 	help
 	  This is a driver for newer Asus notebooks. It adds extra features
 	  like wireless radio and bluetooth control, leds, hotkeys, backlight...
diff --git a/drivers/platform/x86/amd/Kconfig b/drivers/platform/x86/amd/Kconfig
index 55f3a2fc6aec..54753213cc61 100644
--- a/drivers/platform/x86/amd/Kconfig
+++ b/drivers/platform/x86/amd/Kconfig
@@ -18,3 +18,17 @@ config AMD_HSMP
 
 	  If you choose to compile this driver as a module the module will be
 	  called amd_hsmp.
+
+config AMD_WBRF
+	bool "AMD Wifi RF Band mitigations (WBRF)"
+	depends on ACPI
+	help
+	  WBRF(Wifi Band RFI mitigation) mechanism allows Wifi drivers
+	  to notify the frequencies they are using so that other hardware
+	  can be reconfigured to avoid harmonic conflicts.
+
+	  AMD provides an ACPI based mechanism to support WBRF on platform with
+	  appropriate underlying support.
+
+	  This mechanism will only be activated on platforms that advertise a
+	  need for it.
diff --git a/drivers/platform/x86/amd/Makefile b/drivers/platform/x86/amd/Makefile
index f04932b7a7d1..dcec0a46f8af 100644
--- a/drivers/platform/x86/amd/Makefile
+++ b/drivers/platform/x86/amd/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_AMD_PMC)		+= pmc/
 amd_hsmp-y			:= hsmp.o
 obj-$(CONFIG_AMD_HSMP)		+= amd_hsmp.o
 obj-$(CONFIG_AMD_PMF)		+= pmf/
+obj-$(CONFIG_AMD_WBRF)		+= wbrf.o
diff --git a/drivers/platform/x86/amd/wbrf.c b/drivers/platform/x86/amd/wbrf.c
new file mode 100644
index 000000000000..dd197b3aebe0
--- /dev/null
+++ b/drivers/platform/x86/amd/wbrf.c
@@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wifi Frequency Band Manage Interface
+ * Copyright (C) 2023 Advanced Micro Devices
+ */
+
+#include <linux/acpi.h>
+#include <linux/acpi_amd_wbrf.h>
+
+/*
+ * Functions bit vector for WBRF method
+ *
+ * Bit 0: WBRF supported.
+ * Bit 1: Function 1 (Add / Remove frequency) is supported.
+ * Bit 2: Function 2 (Get frequency list) is supported.
+ */
+#define WBRF_ENABLED		0x0
+#define WBRF_RECORD			0x1
+#define WBRF_RETRIEVE		0x2
+
+#define WBRF_REVISION		0x1
+
+/*
+ * The data structure used for WBRF_RETRIEVE is not naturally aligned.
+ * And unfortunately the design has been settled down.
+ */
+struct amd_wbrf_ranges_out {
+	u32			num_of_ranges;
+	struct freq_band_range	band_list[MAX_NUM_OF_WBRF_RANGES];
+} __packed;
+
+static const guid_t wifi_acpi_dsm_guid =
+	GUID_INIT(0x7b7656cf, 0xdc3d, 0x4c1c,
+		  0x83, 0xe9, 0x66, 0xe7, 0x21, 0xde, 0x30, 0x70);
+
+/*
+ * Used to notify consumer (amdgpu driver currently) about
+ * the wifi frequency is change.
+ */
+static BLOCKING_NOTIFIER_HEAD(wbrf_chain_head);
+
+static int wbrf_record(struct acpi_device *adev, uint8_t action, struct wbrf_ranges_in_out *in)
+{
+	union acpi_object argv4;
+	union acpi_object *tmp;
+	union acpi_object *obj;
+	u32 num_of_ranges = 0;
+	u32 num_of_elements;
+	u32 arg_idx = 0;
+	int ret;
+	u32 i;
+
+	if (!in)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(in->band_list); i++) {
+		if (in->band_list[i].start && in->band_list[i].end)
+			num_of_ranges++;
+	}
+
+	/*
+	 * The num_of_ranges value in the "in" object supplied by
+	 * the caller is required to be equal to the number of
+	 * entries in the band_list array in there.
+	 */
+	if (num_of_ranges != in->num_of_ranges)
+		return -EINVAL;
+
+	/*
+	 * Every input frequency band comes with two end points(start/end)
+	 * and each is accounted as an element. Meanwhile the range count
+	 * and action type are accounted as an element each.
+	 * So, the total element count = 2 * num_of_ranges + 1 + 1.
+	 */
+	num_of_elements = 2 * num_of_ranges + 2;
+
+	tmp = kcalloc(num_of_elements, sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	argv4.package.type = ACPI_TYPE_PACKAGE;
+	argv4.package.count = num_of_elements;
+	argv4.package.elements = tmp;
+
+	/* save the number of ranges*/
+	tmp[0].integer.type = ACPI_TYPE_INTEGER;
+	tmp[0].integer.value = num_of_ranges;
+
+	/* save the action(WBRF_RECORD_ADD/REMOVE/RETRIEVE) */
+	tmp[1].integer.type = ACPI_TYPE_INTEGER;
+	tmp[1].integer.value = action;
+
+	arg_idx = 2;
+	for (i = 0; i < ARRAY_SIZE(in->band_list); i++) {
+		if (!in->band_list[i].start || !in->band_list[i].end)
+			continue;
+
+		tmp[arg_idx].integer.type = ACPI_TYPE_INTEGER;
+		tmp[arg_idx++].integer.value = in->band_list[i].start;
+		tmp[arg_idx].integer.type = ACPI_TYPE_INTEGER;
+		tmp[arg_idx++].integer.value = in->band_list[i].end;
+	}
+
+	obj = acpi_evaluate_dsm(adev->handle, &wifi_acpi_dsm_guid,
+				WBRF_REVISION, WBRF_RECORD, &argv4);
+
+	if (!obj)
+		return -EINVAL;
+
+	if (obj->type != ACPI_TYPE_INTEGER) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = obj->integer.value;
+	if (ret)
+		ret = -EINVAL;
+
+out:
+	ACPI_FREE(obj);
+	kfree(tmp);
+
+	return ret;
+}
+
+/**
+ * acpi_amd_wbrf_add_remove - add or remove the frequency band the device is using
+ *
+ * @dev: device pointer
+ * @action: remove or add the frequency band into bios
+ * @in: input structure containing the frequency band the device is using
+ *
+ * Broadcast to other consumers the frequency band the device starts
+ * to use. Underneath the surface the information is cached into an
+ * internal buffer first. Then a notification is sent to all those
+ * registered consumers. So then they can retrieve that buffer to
+ * know the latest active frequency bands. Consumers that haven't
+ * yet been registered can retrieve the information from the cache
+ * when they register.
+ *
+ * Return:
+ * 0 for success add/remove wifi frequency band.
+ * Returns a negative error code for failure.
+ */
+int acpi_amd_wbrf_add_remove(struct device *dev, uint8_t action, struct wbrf_ranges_in_out *in)
+{
+	struct acpi_device *adev;
+	int ret;
+
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
+		return -ENODEV;
+
+	ret = wbrf_record(adev, action, in);
+	if (ret)
+		return ret;
+
+	blocking_notifier_call_chain(&wbrf_chain_head, WBRF_CHANGED, NULL);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_amd_wbrf_add_remove);
+
+/**
+ * acpi_amd_wbrf_supported_producer - determine if the WBRF can be enabled
+ *                                    for the device as a producer
+ *
+ * @dev: device pointer
+ *
+ * Check if the platform equipped with necessary implementations to
+ * support WBRF for the device as a producer.
+ *
+ * Return:
+ * true if WBRF is supported, otherwise returns false
+ */
+bool acpi_amd_wbrf_supported_producer(struct device *dev)
+{
+	struct acpi_device *adev;
+
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
+		return false;
+
+	return acpi_check_dsm(adev->handle, &wifi_acpi_dsm_guid,
+			      WBRF_REVISION, BIT(WBRF_RECORD));
+}
+EXPORT_SYMBOL_GPL(acpi_amd_wbrf_supported_producer);
+
+/**
+ * acpi_amd_wbrf_supported_consumer - determine if the WBRF can be enabled
+ *                                    for the device as a consumer
+ *
+ * @dev: device pointer
+ *
+ * Determine if the platform equipped with necessary implementations to
+ * support WBRF for the device as a consumer.
+ *
+ * Return:
+ * true if WBRF is supported, otherwise returns false.
+ */
+bool acpi_amd_wbrf_supported_consumer(struct device *dev)
+{
+	struct acpi_device *adev;
+
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
+		return false;
+
+	return acpi_check_dsm(adev->handle, &wifi_acpi_dsm_guid,
+			      WBRF_REVISION, BIT(WBRF_RETRIEVE));
+}
+EXPORT_SYMBOL_GPL(acpi_amd_wbrf_supported_consumer);
+
+/**
+ * amd_wbrf_retrieve_freq_band - retrieve current active frequency bands
+ *
+ * @dev: device pointer
+ * @out: output structure containing all the active frequency bands
+ *
+ * Retrieve the current active frequency bands which were broadcasted
+ * by other producers. The consumer who calls this API should take
+ * proper actions if any of the frequency band may cause RFI with its
+ * own frequency band used.
+ *
+ * Return:
+ * 0 for getting wifi freq band successfully.
+ * Returns a negative error code for failure.
+ */
+int amd_wbrf_retrieve_freq_band(struct device *dev, struct wbrf_ranges_in_out *out)
+{
+	struct amd_wbrf_ranges_out acpi_out = {0};
+	struct acpi_device *adev;
+	union acpi_object *obj;
+	union acpi_object param;
+	int ret = 0;
+
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
+		return -ENODEV;
+
+	param.type = ACPI_TYPE_STRING;
+	param.string.length = 0;
+	param.string.pointer = NULL;
+
+	obj = acpi_evaluate_dsm(adev->handle, &wifi_acpi_dsm_guid,
+							WBRF_REVISION, WBRF_RETRIEVE, &param);
+	if (!obj)
+		return -EINVAL;
+
+	/*
+	 * The return buffer is with variable length and the format below:
+	 * number_of_entries(1 DWORD):       Number of entries
+	 * start_freq of 1st entry(1 QWORD): Start frequency of the 1st entry
+	 * end_freq of 1st entry(1 QWORD):   End frequency of the 1st entry
+	 * ...
+	 * ...
+	 * start_freq of the last entry(1 QWORD)
+	 * end_freq of the last entry(1 QWORD)
+	 *
+	 * Thus the buffer length is determined by the number of entries.
+	 * - For zero entry scenario, the buffer length will be 4 bytes.
+	 * - For one entry scenario, the buffer length will be 20 bytes.
+	 */
+	if (obj->buffer.length > sizeof(acpi_out) || obj->buffer.length < 4) {
+		dev_err(dev, "Wrong sized WBRT information");
+		ret = -EINVAL;
+		goto out;
+	}
+	memcpy(&acpi_out, obj->buffer.pointer, obj->buffer.length);
+
+	out->num_of_ranges = acpi_out.num_of_ranges;
+	memcpy(out->band_list, acpi_out.band_list, sizeof(acpi_out.band_list));
+
+out:
+	ACPI_FREE(obj);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(amd_wbrf_retrieve_freq_band);
+
+/**
+ * amd_wbrf_register_notifier - register for notifications of frequency
+ *                                   band update
+ *
+ * @nb: driver notifier block
+ *
+ * The consumer should register itself via this API so that it can get
+ * notified on the frequency band updates from other producers.
+ *
+ * Return:
+ * 0 for registering a consumer driver successfully.
+ * Returns a negative error code for failure.
+ */
+int amd_wbrf_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&wbrf_chain_head, nb);
+}
+EXPORT_SYMBOL_GPL(amd_wbrf_register_notifier);
+
+/**
+ * amd_wbrf_unregister_notifier - unregister for notifications of
+ *                                     frequency band update
+ *
+ * @nb: driver notifier block
+ *
+ * The consumer should call this API when it is longer interested with
+ * the frequency band updates from other producers. Usually, this should
+ * be performed during driver cleanup.
+ *
+ * Return:
+ * 0 for unregistering a consumer driver.
+ * Returns a negative error code for failure.
+ */
+int amd_wbrf_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&wbrf_chain_head, nb);
+}
+EXPORT_SYMBOL_GPL(amd_wbrf_unregister_notifier);
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 9aa1226e74e6..fceffe2082ec 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -48,25 +48,43 @@ module_param(tablet_mode_sw, uint, 0444);
 MODULE_PARM_DESC(tablet_mode_sw, "Tablet mode detect: -1:auto 0:disable 1:kbd-dock 2:lid-flip 3:lid-flip-rog");
 
 static struct quirk_entry *quirks;
+static bool atkbd_reports_vol_keys;
 
-static bool asus_q500a_i8042_filter(unsigned char data, unsigned char str,
-			      struct serio *port)
+static bool asus_i8042_filter(unsigned char data, unsigned char str, struct serio *port)
 {
-	static bool extended;
-	bool ret = false;
+	static bool extended_e0;
+	static bool extended_e1;
 
 	if (str & I8042_STR_AUXDATA)
 		return false;
 
-	if (unlikely(data == 0xe1)) {
-		extended = true;
-		ret = true;
-	} else if (unlikely(extended)) {
-		extended = false;
-		ret = true;
+	if (quirks->filter_i8042_e1_extended_codes) {
+		if (data == 0xe1) {
+			extended_e1 = true;
+			return true;
+		}
+
+		if (extended_e1) {
+			extended_e1 = false;
+			return true;
+		}
 	}
 
-	return ret;
+	if (data == 0xe0) {
+		extended_e0 = true;
+	} else if (extended_e0) {
+		extended_e0 = false;
+
+		switch (data & 0x7f) {
+		case 0x20: /* e0 20 / e0 a0, Volume Mute press / release */
+		case 0x2e: /* e0 2e / e0 ae, Volume Down press / release */
+		case 0x30: /* e0 30 / e0 b0, Volume Up press / release */
+			atkbd_reports_vol_keys = true;
+			break;
+		}
+	}
+
+	return false;
 }
 
 static struct quirk_entry quirk_asus_unknown = {
@@ -75,7 +93,7 @@ static struct quirk_entry quirk_asus_unknown = {
 };
 
 static struct quirk_entry quirk_asus_q500a = {
-	.i8042_filter = asus_q500a_i8042_filter,
+	.filter_i8042_e1_extended_codes = true,
 	.wmi_backlight_set_devstate = true,
 };
 
@@ -503,8 +521,6 @@ static const struct dmi_system_id asus_quirks[] = {
 
 static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver)
 {
-	int ret;
-
 	quirks = &quirk_asus_unknown;
 	dmi_check_system(asus_quirks);
 
@@ -519,15 +535,6 @@ static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver)
 
 	if (tablet_mode_sw != -1)
 		quirks->tablet_switch_mode = tablet_mode_sw;
-
-	if (quirks->i8042_filter) {
-		ret = i8042_install_filter(quirks->i8042_filter);
-		if (ret) {
-			pr_warn("Unable to install key filter\n");
-			return;
-		}
-		pr_info("Using i8042 filter function for receiving events\n");
-	}
 }
 
 static const struct key_entry asus_nb_wmi_keymap[] = {
@@ -618,6 +625,13 @@ static void asus_nb_wmi_key_filter(struct asus_wmi_driver *asus_wmi, int *code,
 			*code = ASUS_WMI_KEY_IGNORE;
 
 		break;
+	case 0x30: /* Volume Up */
+	case 0x31: /* Volume Down */
+	case 0x32: /* Volume Mute */
+		if (atkbd_reports_vol_keys)
+			*code = ASUS_WMI_KEY_IGNORE;
+
+		break;
 	}
 }
 
@@ -630,6 +644,7 @@ static struct asus_wmi_driver asus_nb_wmi_driver = {
 	.input_phys = ASUS_NB_WMI_FILE "/input0",
 	.detect_quirks = asus_nb_wmi_quirks,
 	.key_filter = asus_nb_wmi_key_filter,
+	.i8042_filter = asus_i8042_filter,
 };
 
 
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 6a79f16233ab..9f7e23c5c6b4 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -16,6 +16,7 @@
 #include <linux/acpi.h>
 #include <linux/backlight.h>
 #include <linux/debugfs.h>
+#include <linux/delay.h>
 #include <linux/dmi.h>
 #include <linux/fb.h>
 #include <linux/hwmon.h>
@@ -132,6 +133,11 @@ module_param(fnlock_default, bool, 0444);
 #define ASUS_SCREENPAD_BRIGHT_MAX 255
 #define ASUS_SCREENPAD_BRIGHT_DEFAULT 60
 
+/* Controls the power state of the USB0 hub on ROG Ally which input is on */
+#define ASUS_USB0_PWR_EC0_CSEE "\\_SB.PCI0.SBRG.EC0.CSEE"
+/* 300ms so far seems to produce a reliable result on AC and battery */
+#define ASUS_USB0_PWR_EC0_CSEE_WAIT 300
+
 static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL };
 
 static int throttle_thermal_policy_write(struct asus_wmi *);
@@ -300,6 +306,9 @@ struct asus_wmi {
 
 	bool fnlock_locked;
 
+	/* The ROG Ally device requires the MCU USB device be disconnected before suspend */
+	bool ally_mcu_usb_switch;
+
 	struct asus_wmi_debug debug;
 
 	struct asus_wmi_driver *driver;
@@ -4488,6 +4497,8 @@ static int asus_wmi_add(struct platform_device *pdev)
 	asus->nv_temp_tgt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_THERM_TARGET);
 	asus->panel_overdrive_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PANEL_OD);
 	asus->mini_led_mode_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE);
+	asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE)
+						&& dmi_match(DMI_BOARD_NAME, "RC71L");
 
 	err = fan_boost_mode_check_present(asus);
 	if (err)
@@ -4567,6 +4578,12 @@ static int asus_wmi_add(struct platform_device *pdev)
 		goto fail_wmi_handler;
 	}
 
+	if (asus->driver->i8042_filter) {
+		err = i8042_install_filter(asus->driver->i8042_filter);
+		if (err)
+			pr_warn("Unable to install key filter - %d\n", err);
+	}
+
 	asus_wmi_battery_init(asus);
 
 	asus_wmi_debugfs_init(asus);
@@ -4603,6 +4620,8 @@ static int asus_wmi_remove(struct platform_device *device)
 	struct asus_wmi *asus;
 
 	asus = platform_get_drvdata(device);
+	if (asus->driver->i8042_filter)
+		i8042_remove_filter(asus->driver->i8042_filter);
 	wmi_remove_notify_handler(asus->driver->event_guid);
 	asus_wmi_backlight_exit(asus);
 	asus_screenpad_exit(asus);
@@ -4654,6 +4673,43 @@ static int asus_hotk_resume(struct device *device)
 		asus_wmi_fnlock_update(asus);
 
 	asus_wmi_tablet_mode_get_state(asus);
+
+	return 0;
+}
+
+static int asus_hotk_resume_early(struct device *device)
+{
+	struct asus_wmi *asus = dev_get_drvdata(device);
+
+	if (asus->ally_mcu_usb_switch) {
+		if (ACPI_FAILURE(acpi_execute_simple_method(NULL, ASUS_USB0_PWR_EC0_CSEE, 0xB8)))
+			dev_err(device, "ROG Ally MCU failed to connect USB dev\n");
+		else
+			msleep(ASUS_USB0_PWR_EC0_CSEE_WAIT);
+	}
+	return 0;
+}
+
+static int asus_hotk_prepare(struct device *device)
+{
+	struct asus_wmi *asus = dev_get_drvdata(device);
+	int result, err;
+
+	if (asus->ally_mcu_usb_switch) {
+		/* When powersave is enabled it causes many issues with resume of USB hub */
+		result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_MCU_POWERSAVE);
+		if (result == 1) {
+			dev_warn(device, "MCU powersave enabled, disabling to prevent resume issues");
+			err = asus_wmi_set_devstate(ASUS_WMI_DEVID_MCU_POWERSAVE, 0, &result);
+			if (err || result != 1)
+				dev_err(device, "Failed to set MCU powersave mode: %d\n", err);
+		}
+		/* sleep required to ensure USB0 is disabled before sleep continues */
+		if (ACPI_FAILURE(acpi_execute_simple_method(NULL, ASUS_USB0_PWR_EC0_CSEE, 0xB7)))
+			dev_err(device, "ROG Ally MCU failed to disconnect USB dev\n");
+		else
+			msleep(ASUS_USB0_PWR_EC0_CSEE_WAIT);
+	}
 	return 0;
 }
 
@@ -4701,6 +4757,8 @@ static const struct dev_pm_ops asus_pm_ops = {
 	.thaw = asus_hotk_thaw,
 	.restore = asus_hotk_restore,
 	.resume = asus_hotk_resume,
+	.resume_early = asus_hotk_resume_early,
+	.prepare = asus_hotk_prepare,
 };
 
 /* Registration ***************************************************************/
diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h
index adb67c925724..cc30f1853847 100644
--- a/drivers/platform/x86/asus-wmi.h
+++ b/drivers/platform/x86/asus-wmi.h
@@ -39,6 +39,7 @@ struct quirk_entry {
 	bool wmi_backlight_set_devstate;
 	bool wmi_force_als_set;
 	bool wmi_ignore_fan;
+	bool filter_i8042_e1_extended_codes;
 	enum asus_wmi_tablet_switch_mode tablet_switch_mode;
 	int wapf;
 	/*
@@ -49,9 +50,6 @@ struct quirk_entry {
 	 */
 	int no_display_toggle;
 	u32 xusb2pr;
-
-	bool (*i8042_filter)(unsigned char data, unsigned char str,
-			     struct serio *serio);
 };
 
 struct asus_wmi_driver {
@@ -73,6 +71,9 @@ struct asus_wmi_driver {
 	 * Return ASUS_WMI_KEY_IGNORE in code if event should be ignored. */
 	void (*key_filter) (struct asus_wmi_driver *driver, int *code,
 			    unsigned int *value, bool *autorelease);
+	/* Optional standard i8042 filter */
+	bool (*i8042_filter)(unsigned char data, unsigned char str,
+			     struct serio *serio);
 
 	int (*probe) (struct platform_device *device);
 	void (*detect_quirks) (struct asus_wmi_driver *driver);
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 5c27b4aa9690..5dd22258cb3b 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -1340,6 +1340,11 @@ static int parse_wdg(struct device *wmi_bus_dev, struct platform_device *pdev)
 		if (debug_dump_wdg)
 			wmi_dump_wdg(&gblock[i]);
 
+		if (!gblock[i].instance_count) {
+			dev_info(wmi_bus_dev, FW_INFO "%pUL has zero instances\n", &gblock[i].guid);
+			continue;
+		}
+
 		if (guid_already_parsed_for_legacy(device, &gblock[i].guid))
 			continue;
 
diff --git a/drivers/pmdomain/arm/scmi_perf_domain.c b/drivers/pmdomain/arm/scmi_perf_domain.c
index bc3f78abb6da..709bbc448fad 100644
--- a/drivers/pmdomain/arm/scmi_perf_domain.c
+++ b/drivers/pmdomain/arm/scmi_perf_domain.c
@@ -35,7 +35,7 @@ scmi_pd_set_perf_state(struct generic_pm_domain *genpd, unsigned int state)
 	if (!state)
 		return -EINVAL;
 
-	ret = pd->perf_ops->level_set(pd->ph, pd->domain_id, state, true);
+	ret = pd->perf_ops->level_set(pd->ph, pd->domain_id, state, false);
 	if (ret)
 		dev_warn(&genpd->dev, "Failed with %d when trying to set %d perf level",
 			 ret, state);
diff --git a/drivers/pmdomain/qcom/rpmpd.c b/drivers/pmdomain/qcom/rpmpd.c
index 07590a3ef19c..7796d65f96e8 100644
--- a/drivers/pmdomain/qcom/rpmpd.c
+++ b/drivers/pmdomain/qcom/rpmpd.c
@@ -1044,6 +1044,7 @@ static int rpmpd_probe(struct platform_device *pdev)
 		rpmpds[i]->pd.power_off = rpmpd_power_off;
 		rpmpds[i]->pd.power_on = rpmpd_power_on;
 		rpmpds[i]->pd.set_performance_state = rpmpd_set_performance;
+		rpmpds[i]->pd.flags = GENPD_FLAG_ACTIVE_WAKEUP;
 		pm_genpd_init(&rpmpds[i]->pd, NULL, true);
 
 		data->domains[i] = &rpmpds[i]->pd;
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index 2ff7717530bf..9193c3b8edeb 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -24,7 +24,6 @@
 #include <linux/of.h>
 #include <linux/pm_qos.h>
 #include <linux/slab.h>
-#include <linux/units.h>
 
 struct dtpm_cpu {
 	struct dtpm dtpm;
@@ -104,8 +103,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
 		if (pd->table[i].frequency < freq)
 			continue;
 
-		return scale_pd_power_uw(pd_mask, pd->table[i].power *
-					 MICROWATT_PER_MILLIWATT);
+		return scale_pd_power_uw(pd_mask, pd->table[i].power);
 	}
 
 	return 0;
@@ -122,11 +120,9 @@ static int update_pd_power_uw(struct dtpm *dtpm)
 	nr_cpus = cpumask_weight(&cpus);
 
 	dtpm->power_min = em->table[0].power;
-	dtpm->power_min *= MICROWATT_PER_MILLIWATT;
 	dtpm->power_min *= nr_cpus;
 
 	dtpm->power_max = em->table[em->nr_perf_states - 1].power;
-	dtpm->power_max *= MICROWATT_PER_MILLIWATT;
 	dtpm->power_max *= nr_cpus;
 
 	return 0;
@@ -144,6 +140,8 @@ static void pd_release(struct dtpm *dtpm)
 	if (policy) {
 		for_each_cpu(dtpm_cpu->cpu, policy->related_cpus)
 			per_cpu(dtpm_per_cpu, dtpm_cpu->cpu) = NULL;
+
+		cpufreq_cpu_put(policy);
 	}
 	
 	kfree(dtpm_cpu);
@@ -195,12 +193,16 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
 		return 0;
 
 	pd = em_cpu_get(cpu);
-	if (!pd || em_is_artificial(pd))
-		return -EINVAL;
+	if (!pd || em_is_artificial(pd)) {
+		ret = -EINVAL;
+		goto release_policy;
+	}
 
 	dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
-	if (!dtpm_cpu)
-		return -ENOMEM;
+	if (!dtpm_cpu) {
+		ret = -ENOMEM;
+		goto release_policy;
+	}
 
 	dtpm_init(&dtpm_cpu->dtpm, &dtpm_ops);
 	dtpm_cpu->cpu = cpu;
@@ -220,6 +222,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
 	if (ret)
 		goto out_dtpm_unregister;
 
+	cpufreq_cpu_put(policy);
 	return 0;
 
 out_dtpm_unregister:
@@ -231,6 +234,8 @@ out_kfree_dtpm_cpu:
 		per_cpu(dtpm_per_cpu, cpu) = NULL;
 	kfree(dtpm_cpu);
 
+release_policy:
+	cpufreq_cpu_put(policy);
 	return ret;
 }
 
diff --git a/drivers/powercap/dtpm_devfreq.c b/drivers/powercap/dtpm_devfreq.c
index 91276761a31d..612c3b59dd5b 100644
--- a/drivers/powercap/dtpm_devfreq.c
+++ b/drivers/powercap/dtpm_devfreq.c
@@ -39,10 +39,8 @@ static int update_pd_power_uw(struct dtpm *dtpm)
 	struct em_perf_domain *pd = em_pd_get(dev);
 
 	dtpm->power_min = pd->table[0].power;
-	dtpm->power_min *= MICROWATT_PER_MILLIWATT;
 
 	dtpm->power_max = pd->table[pd->nr_perf_states - 1].power;
-	dtpm->power_max *= MICROWATT_PER_MILLIWATT;
 
 	return 0;
 }
@@ -54,13 +52,10 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
 	struct device *dev = devfreq->dev.parent;
 	struct em_perf_domain *pd = em_pd_get(dev);
 	unsigned long freq;
-	u64 power;
 	int i;
 
 	for (i = 0; i < pd->nr_perf_states; i++) {
-
-		power = pd->table[i].power * MICROWATT_PER_MILLIWATT;
-		if (power > power_limit)
+		if (pd->table[i].power > power_limit)
 			break;
 	}
 
@@ -68,7 +63,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
 
 	dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq);
 
-	power_limit = pd->table[i - 1].power * MICROWATT_PER_MILLIWATT;
+	power_limit = pd->table[i - 1].power;
 
 	return power_limit;
 }
@@ -110,7 +105,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
 		if (pd->table[i].frequency < freq)
 			continue;
 
-		power = pd->table[i].power * MICROWATT_PER_MILLIWATT;
+		power = pd->table[i].power;
 		power *= status.busy_time;
 		power >>= 10;
 
diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c
index 9777babd5b95..ab30667f4f95 100644
--- a/drivers/pwm/pwm-bcm2835.c
+++ b/drivers/pwm/pwm-bcm2835.c
@@ -155,6 +155,8 @@ static int bcm2835_pwm_probe(struct platform_device *pdev)
 	pc->chip.ops = &bcm2835_pwm_ops;
 	pc->chip.npwm = 2;
 
+	platform_set_drvdata(pdev, pc);
+
 	ret = devm_pwmchip_add(&pdev->dev, &pc->chip);
 	if (ret < 0)
 		return dev_err_probe(&pdev->dev, ret,
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index e48f14ad6dfd..06acb5ff609e 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -2710,6 +2710,7 @@ init_wrb_hndl_failed:
 		kfree(pwrb_context->pwrb_handle_base);
 		kfree(pwrb_context->pwrb_handle_basestd);
 	}
+	kfree(phwi_ctxt->be_wrbq);
 	return -ENOMEM;
 }
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fa00dd503cbf..542a4bbb21bc 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3949,8 +3949,15 @@ static int sd_resume(struct device *dev, bool runtime)
 
 static int sd_resume_system(struct device *dev)
 {
-	if (pm_runtime_suspended(dev))
+	if (pm_runtime_suspended(dev)) {
+		struct scsi_disk *sdkp = dev_get_drvdata(dev);
+		struct scsi_device *sdp = sdkp ? sdkp->device : NULL;
+
+		if (sdp && sdp->force_runtime_start_on_system_start)
+			pm_request_resume(dev);
+
 		return 0;
+	}
 
 	return sd_resume(dev, false);
 }
diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c
index 64f0e047c23d..4b1092127694 100644
--- a/drivers/tee/optee/device.c
+++ b/drivers/tee/optee/device.c
@@ -60,7 +60,16 @@ static void optee_release_device(struct device *dev)
 	kfree(optee_device);
 }
 
-static int optee_register_device(const uuid_t *device_uuid)
+static ssize_t need_supplicant_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	return 0;
+}
+
+static DEVICE_ATTR_RO(need_supplicant);
+
+static int optee_register_device(const uuid_t *device_uuid, u32 func)
 {
 	struct tee_client_device *optee_device = NULL;
 	int rc;
@@ -83,6 +92,10 @@ static int optee_register_device(const uuid_t *device_uuid)
 		put_device(&optee_device->dev);
 	}
 
+	if (func == PTA_CMD_GET_DEVICES_SUPP)
+		device_create_file(&optee_device->dev,
+				   &dev_attr_need_supplicant);
+
 	return rc;
 }
 
@@ -142,7 +155,7 @@ static int __optee_enumerate_devices(u32 func)
 	num_devices = shm_size / sizeof(uuid_t);
 
 	for (idx = 0; idx < num_devices; idx++) {
-		rc = optee_register_device(&device_uuid[idx]);
+		rc = optee_register_device(&device_uuid[idx], func);
 		if (rc)
 			goto out_shm;
 	}
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index b94f567647cb..e6218766d0c8 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -777,6 +777,7 @@ static const struct acpi_device_id dw8250_acpi_match[] = {
 	{ "INT33C5", (kernel_ulong_t)&dw8250_dw_apb },
 	{ "INT3434", (kernel_ulong_t)&dw8250_dw_apb },
 	{ "INT3435", (kernel_ulong_t)&dw8250_dw_apb },
+	{ "INTC10EE", (kernel_ulong_t)&dw8250_dw_apb },
 	{ },
 };
 MODULE_DEVICE_TABLE(acpi, dw8250_acpi_match);
diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c
index 9837a27739fd..e3f482fd3de4 100644
--- a/drivers/tty/serial/8250/8250_early.c
+++ b/drivers/tty/serial/8250/8250_early.c
@@ -189,5 +189,6 @@ static int __init early_omap8250_setup(struct earlycon_device *device,
 OF_EARLYCON_DECLARE(omap8250, "ti,omap2-uart", early_omap8250_setup);
 OF_EARLYCON_DECLARE(omap8250, "ti,omap3-uart", early_omap8250_setup);
 OF_EARLYCON_DECLARE(omap8250, "ti,omap4-uart", early_omap8250_setup);
+OF_EARLYCON_DECLARE(omap8250, "ti,am654-uart", early_omap8250_setup);
 
 #endif
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index 2d42f485c987..578f35895b27 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -933,7 +933,7 @@ static void __dma_rx_do_complete(struct uart_8250_port *p)
 	if (priv->habit & UART_HAS_RHR_IT_DIS) {
 		reg = serial_in(p, UART_OMAP_IER2);
 		reg &= ~UART_OMAP_IER2_RHR_IT_DIS;
-		serial_out(p, UART_OMAP_IER2, UART_OMAP_IER2_RHR_IT_DIS);
+		serial_out(p, UART_OMAP_IER2, reg);
 	}
 
 	dmaengine_tx_status(rxchan, cookie, &state);
@@ -1079,7 +1079,7 @@ static int omap_8250_rx_dma(struct uart_8250_port *p)
 	if (priv->habit & UART_HAS_RHR_IT_DIS) {
 		reg = serial_in(p, UART_OMAP_IER2);
 		reg |= UART_OMAP_IER2_RHR_IT_DIS;
-		serial_out(p, UART_OMAP_IER2, UART_OMAP_IER2_RHR_IT_DIS);
+		serial_out(p, UART_OMAP_IER2, reg);
 	}
 
 	dma_async_issue_pending(dma->rxchan);
@@ -1298,10 +1298,12 @@ static int omap_8250_dma_handle_irq(struct uart_port *port)
 
 	status = serial_port_in(port, UART_LSR);
 
-	if (priv->habit & UART_HAS_EFR2)
-		am654_8250_handle_rx_dma(up, iir, status);
-	else
-		status = omap_8250_handle_rx_dma(up, iir, status);
+	if ((iir & 0x3f) != UART_IIR_THRI) {
+		if (priv->habit & UART_HAS_EFR2)
+			am654_8250_handle_rx_dma(up, iir, status);
+		else
+			status = omap_8250_handle_rx_dma(up, iir, status);
+	}
 
 	serial8250_modem_status(up);
 	if (status & UART_LSR_THRE && up->dma->tx_err) {
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 61cc24cd90e4..b7635363373e 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -218,17 +218,18 @@ static struct vendor_data vendor_st = {
 
 /* Deals with DMA transactions */
 
-struct pl011_sgbuf {
-	struct scatterlist sg;
-	char *buf;
+struct pl011_dmabuf {
+	dma_addr_t		dma;
+	size_t			len;
+	char			*buf;
 };
 
 struct pl011_dmarx_data {
 	struct dma_chan		*chan;
 	struct completion	complete;
 	bool			use_buf_b;
-	struct pl011_sgbuf	sgbuf_a;
-	struct pl011_sgbuf	sgbuf_b;
+	struct pl011_dmabuf	dbuf_a;
+	struct pl011_dmabuf	dbuf_b;
 	dma_cookie_t		cookie;
 	bool			running;
 	struct timer_list	timer;
@@ -241,7 +242,8 @@ struct pl011_dmarx_data {
 
 struct pl011_dmatx_data {
 	struct dma_chan		*chan;
-	struct scatterlist	sg;
+	dma_addr_t		dma;
+	size_t			len;
 	char			*buf;
 	bool			queued;
 };
@@ -366,32 +368,24 @@ static int pl011_fifo_to_tty(struct uart_amba_port *uap)
 
 #define PL011_DMA_BUFFER_SIZE PAGE_SIZE
 
-static int pl011_sgbuf_init(struct dma_chan *chan, struct pl011_sgbuf *sg,
+static int pl011_dmabuf_init(struct dma_chan *chan, struct pl011_dmabuf *db,
 	enum dma_data_direction dir)
 {
-	dma_addr_t dma_addr;
-
-	sg->buf = dma_alloc_coherent(chan->device->dev,
-		PL011_DMA_BUFFER_SIZE, &dma_addr, GFP_KERNEL);
-	if (!sg->buf)
+	db->buf = dma_alloc_coherent(chan->device->dev, PL011_DMA_BUFFER_SIZE,
+				     &db->dma, GFP_KERNEL);
+	if (!db->buf)
 		return -ENOMEM;
-
-	sg_init_table(&sg->sg, 1);
-	sg_set_page(&sg->sg, phys_to_page(dma_addr),
-		PL011_DMA_BUFFER_SIZE, offset_in_page(dma_addr));
-	sg_dma_address(&sg->sg) = dma_addr;
-	sg_dma_len(&sg->sg) = PL011_DMA_BUFFER_SIZE;
+	db->len = PL011_DMA_BUFFER_SIZE;
 
 	return 0;
 }
 
-static void pl011_sgbuf_free(struct dma_chan *chan, struct pl011_sgbuf *sg,
+static void pl011_dmabuf_free(struct dma_chan *chan, struct pl011_dmabuf *db,
 	enum dma_data_direction dir)
 {
-	if (sg->buf) {
+	if (db->buf) {
 		dma_free_coherent(chan->device->dev,
-			PL011_DMA_BUFFER_SIZE, sg->buf,
-			sg_dma_address(&sg->sg));
+				  PL011_DMA_BUFFER_SIZE, db->buf, db->dma);
 	}
 }
 
@@ -552,8 +546,8 @@ static void pl011_dma_tx_callback(void *data)
 
 	uart_port_lock_irqsave(&uap->port, &flags);
 	if (uap->dmatx.queued)
-		dma_unmap_sg(dmatx->chan->device->dev, &dmatx->sg, 1,
-			     DMA_TO_DEVICE);
+		dma_unmap_single(dmatx->chan->device->dev, dmatx->dma,
+				dmatx->len, DMA_TO_DEVICE);
 
 	dmacr = uap->dmacr;
 	uap->dmacr = dmacr & ~UART011_TXDMAE;
@@ -639,18 +633,19 @@ static int pl011_dma_tx_refill(struct uart_amba_port *uap)
 			memcpy(&dmatx->buf[first], &xmit->buf[0], second);
 	}
 
-	dmatx->sg.length = count;
-
-	if (dma_map_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE) != 1) {
+	dmatx->len = count;
+	dmatx->dma = dma_map_single(dma_dev->dev, dmatx->buf, count,
+				    DMA_TO_DEVICE);
+	if (dmatx->dma == DMA_MAPPING_ERROR) {
 		uap->dmatx.queued = false;
 		dev_dbg(uap->port.dev, "unable to map TX DMA\n");
 		return -EBUSY;
 	}
 
-	desc = dmaengine_prep_slave_sg(chan, &dmatx->sg, 1, DMA_MEM_TO_DEV,
+	desc = dmaengine_prep_slave_single(chan, dmatx->dma, dmatx->len, DMA_MEM_TO_DEV,
 					     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc) {
-		dma_unmap_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE);
+		dma_unmap_single(dma_dev->dev, dmatx->dma, dmatx->len, DMA_TO_DEVICE);
 		uap->dmatx.queued = false;
 		/*
 		 * If DMA cannot be used right now, we complete this
@@ -813,8 +808,8 @@ __acquires(&uap->port.lock)
 	dmaengine_terminate_async(uap->dmatx.chan);
 
 	if (uap->dmatx.queued) {
-		dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1,
-			     DMA_TO_DEVICE);
+		dma_unmap_single(uap->dmatx.chan->device->dev, uap->dmatx.dma,
+				 uap->dmatx.len, DMA_TO_DEVICE);
 		uap->dmatx.queued = false;
 		uap->dmacr &= ~UART011_TXDMAE;
 		pl011_write(uap->dmacr, uap, REG_DMACR);
@@ -828,15 +823,15 @@ static int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap)
 	struct dma_chan *rxchan = uap->dmarx.chan;
 	struct pl011_dmarx_data *dmarx = &uap->dmarx;
 	struct dma_async_tx_descriptor *desc;
-	struct pl011_sgbuf *sgbuf;
+	struct pl011_dmabuf *dbuf;
 
 	if (!rxchan)
 		return -EIO;
 
 	/* Start the RX DMA job */
-	sgbuf = uap->dmarx.use_buf_b ?
-		&uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a;
-	desc = dmaengine_prep_slave_sg(rxchan, &sgbuf->sg, 1,
+	dbuf = uap->dmarx.use_buf_b ?
+		&uap->dmarx.dbuf_b : &uap->dmarx.dbuf_a;
+	desc = dmaengine_prep_slave_single(rxchan, dbuf->dma, dbuf->len,
 					DMA_DEV_TO_MEM,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	/*
@@ -876,8 +871,8 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap,
 			       bool readfifo)
 {
 	struct tty_port *port = &uap->port.state->port;
-	struct pl011_sgbuf *sgbuf = use_buf_b ?
-		&uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a;
+	struct pl011_dmabuf *dbuf = use_buf_b ?
+		&uap->dmarx.dbuf_b : &uap->dmarx.dbuf_a;
 	int dma_count = 0;
 	u32 fifotaken = 0; /* only used for vdbg() */
 
@@ -886,7 +881,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap,
 
 	if (uap->dmarx.poll_rate) {
 		/* The data can be taken by polling */
-		dmataken = sgbuf->sg.length - dmarx->last_residue;
+		dmataken = dbuf->len - dmarx->last_residue;
 		/* Recalculate the pending size */
 		if (pending >= dmataken)
 			pending -= dmataken;
@@ -900,7 +895,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap,
 		 * Note that tty_insert_flip_buf() tries to take as many chars
 		 * as it can.
 		 */
-		dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken,
+		dma_count = tty_insert_flip_string(port, dbuf->buf + dmataken,
 				pending);
 
 		uap->port.icount.rx += dma_count;
@@ -911,7 +906,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap,
 
 	/* Reset the last_residue for Rx DMA poll */
 	if (uap->dmarx.poll_rate)
-		dmarx->last_residue = sgbuf->sg.length;
+		dmarx->last_residue = dbuf->len;
 
 	/*
 	 * Only continue with trying to read the FIFO if all DMA chars have
@@ -946,8 +941,8 @@ static void pl011_dma_rx_irq(struct uart_amba_port *uap)
 {
 	struct pl011_dmarx_data *dmarx = &uap->dmarx;
 	struct dma_chan *rxchan = dmarx->chan;
-	struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ?
-		&dmarx->sgbuf_b : &dmarx->sgbuf_a;
+	struct pl011_dmabuf *dbuf = dmarx->use_buf_b ?
+		&dmarx->dbuf_b : &dmarx->dbuf_a;
 	size_t pending;
 	struct dma_tx_state state;
 	enum dma_status dmastat;
@@ -969,7 +964,7 @@ static void pl011_dma_rx_irq(struct uart_amba_port *uap)
 	pl011_write(uap->dmacr, uap, REG_DMACR);
 	uap->dmarx.running = false;
 
-	pending = sgbuf->sg.length - state.residue;
+	pending = dbuf->len - state.residue;
 	BUG_ON(pending > PL011_DMA_BUFFER_SIZE);
 	/* Then we terminate the transfer - we now know our residue */
 	dmaengine_terminate_all(rxchan);
@@ -996,8 +991,8 @@ static void pl011_dma_rx_callback(void *data)
 	struct pl011_dmarx_data *dmarx = &uap->dmarx;
 	struct dma_chan *rxchan = dmarx->chan;
 	bool lastbuf = dmarx->use_buf_b;
-	struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ?
-		&dmarx->sgbuf_b : &dmarx->sgbuf_a;
+	struct pl011_dmabuf *dbuf = dmarx->use_buf_b ?
+		&dmarx->dbuf_b : &dmarx->dbuf_a;
 	size_t pending;
 	struct dma_tx_state state;
 	int ret;
@@ -1015,7 +1010,7 @@ static void pl011_dma_rx_callback(void *data)
 	 * the DMA irq handler. So we check the residue here.
 	 */
 	rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state);
-	pending = sgbuf->sg.length - state.residue;
+	pending = dbuf->len - state.residue;
 	BUG_ON(pending > PL011_DMA_BUFFER_SIZE);
 	/* Then we terminate the transfer - we now know our residue */
 	dmaengine_terminate_all(rxchan);
@@ -1067,16 +1062,16 @@ static void pl011_dma_rx_poll(struct timer_list *t)
 	unsigned long flags;
 	unsigned int dmataken = 0;
 	unsigned int size = 0;
-	struct pl011_sgbuf *sgbuf;
+	struct pl011_dmabuf *dbuf;
 	int dma_count;
 	struct dma_tx_state state;
 
-	sgbuf = dmarx->use_buf_b ? &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a;
+	dbuf = dmarx->use_buf_b ? &uap->dmarx.dbuf_b : &uap->dmarx.dbuf_a;
 	rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state);
 	if (likely(state.residue < dmarx->last_residue)) {
-		dmataken = sgbuf->sg.length - dmarx->last_residue;
+		dmataken = dbuf->len - dmarx->last_residue;
 		size = dmarx->last_residue - state.residue;
-		dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken,
+		dma_count = tty_insert_flip_string(port, dbuf->buf + dmataken,
 				size);
 		if (dma_count == size)
 			dmarx->last_residue =  state.residue;
@@ -1123,7 +1118,7 @@ static void pl011_dma_startup(struct uart_amba_port *uap)
 		return;
 	}
 
-	sg_init_one(&uap->dmatx.sg, uap->dmatx.buf, PL011_DMA_BUFFER_SIZE);
+	uap->dmatx.len = PL011_DMA_BUFFER_SIZE;
 
 	/* The DMA buffer is now the FIFO the TTY subsystem can use */
 	uap->port.fifosize = PL011_DMA_BUFFER_SIZE;
@@ -1133,7 +1128,7 @@ static void pl011_dma_startup(struct uart_amba_port *uap)
 		goto skip_rx;
 
 	/* Allocate and map DMA RX buffers */
-	ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_a,
+	ret = pl011_dmabuf_init(uap->dmarx.chan, &uap->dmarx.dbuf_a,
 			       DMA_FROM_DEVICE);
 	if (ret) {
 		dev_err(uap->port.dev, "failed to init DMA %s: %d\n",
@@ -1141,12 +1136,12 @@ static void pl011_dma_startup(struct uart_amba_port *uap)
 		goto skip_rx;
 	}
 
-	ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_b,
+	ret = pl011_dmabuf_init(uap->dmarx.chan, &uap->dmarx.dbuf_b,
 			       DMA_FROM_DEVICE);
 	if (ret) {
 		dev_err(uap->port.dev, "failed to init DMA %s: %d\n",
 			"RX buffer B", ret);
-		pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a,
+		pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_a,
 				 DMA_FROM_DEVICE);
 		goto skip_rx;
 	}
@@ -1200,8 +1195,9 @@ static void pl011_dma_shutdown(struct uart_amba_port *uap)
 		/* In theory, this should already be done by pl011_dma_flush_buffer */
 		dmaengine_terminate_all(uap->dmatx.chan);
 		if (uap->dmatx.queued) {
-			dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1,
-				     DMA_TO_DEVICE);
+			dma_unmap_single(uap->dmatx.chan->device->dev,
+					 uap->dmatx.dma, uap->dmatx.len,
+					 DMA_TO_DEVICE);
 			uap->dmatx.queued = false;
 		}
 
@@ -1212,8 +1208,8 @@ static void pl011_dma_shutdown(struct uart_amba_port *uap)
 	if (uap->using_rx_dma) {
 		dmaengine_terminate_all(uap->dmarx.chan);
 		/* Clean up the RX DMA */
-		pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, DMA_FROM_DEVICE);
-		pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_b, DMA_FROM_DEVICE);
+		pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_a, DMA_FROM_DEVICE);
+		pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_b, DMA_FROM_DEVICE);
 		if (uap->dmarx.poll_rate)
 			del_timer_sync(&uap->dmarx.timer);
 		uap->using_rx_dma = false;
diff --git a/drivers/tty/serial/ma35d1_serial.c b/drivers/tty/serial/ma35d1_serial.c
index a6a7c405892e..21b574f78b86 100644
--- a/drivers/tty/serial/ma35d1_serial.c
+++ b/drivers/tty/serial/ma35d1_serial.c
@@ -552,11 +552,19 @@ static void ma35d1serial_console_putchar(struct uart_port *port, unsigned char c
  */
 static void ma35d1serial_console_write(struct console *co, const char *s, u32 count)
 {
-	struct uart_ma35d1_port *up = &ma35d1serial_ports[co->index];
+	struct uart_ma35d1_port *up;
 	unsigned long flags;
 	int locked = 1;
 	u32 ier;
 
+	if ((co->index < 0) || (co->index >= MA35_UART_NR)) {
+		pr_warn("Failed to write on ononsole port %x, out of range\n",
+			co->index);
+		return;
+	}
+
+	up = &ma35d1serial_ports[co->index];
+
 	if (up->port.sysrq)
 		locked = 0;
 	else if (oops_in_progress)
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index db2bb1c0d36c..cf0c6120d30e 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -766,6 +766,18 @@ static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno)
 		case SC16IS7XX_IIR_RTOI_SRC:
 		case SC16IS7XX_IIR_XOFFI_SRC:
 			rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG);
+
+			/*
+			 * There is a silicon bug that makes the chip report a
+			 * time-out interrupt but no data in the FIFO. This is
+			 * described in errata section 18.1.4.
+			 *
+			 * When this happens, read one byte from the FIFO to
+			 * clear the interrupt.
+			 */
+			if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen)
+				rxlen = 1;
+
 			if (rxlen)
 				sc16is7xx_handle_rx(port, rxlen, iir);
 			break;
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 8b1031fb0a44..bce0d2a9a7f3 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -6444,11 +6444,24 @@ static bool ufshcd_abort_one(struct request *rq, void *priv)
 	struct scsi_device *sdev = cmd->device;
 	struct Scsi_Host *shost = sdev->host;
 	struct ufs_hba *hba = shost_priv(shost);
+	struct ufshcd_lrb *lrbp = &hba->lrb[tag];
+	struct ufs_hw_queue *hwq;
+	unsigned long flags;
 
 	*ret = ufshcd_try_to_abort_task(hba, tag);
 	dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
 		hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
 		*ret ? "failed" : "succeeded");
+
+	/* Release cmd in MCQ mode if abort succeeds */
+	if (is_mcq_enabled(hba) && (*ret == 0)) {
+		hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
+		spin_lock_irqsave(&hwq->cq_lock, flags);
+		if (ufshcd_cmd_inflight(lrbp->cmd))
+			ufshcd_release_scsi_cmd(hba, lrbp);
+		spin_unlock_irqrestore(&hwq->cq_lock, flags);
+	}
+
 	return *ret == 0;
 }
 
diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index ea85e2c701a1..3c8a9dd585c0 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -92,6 +92,7 @@ static void hidg_release(struct device *dev)
 {
 	struct f_hidg *hidg = container_of(dev, struct f_hidg, dev);
 
+	kfree(hidg->report_desc);
 	kfree(hidg->set_report_buf);
 	kfree(hidg);
 }
@@ -1287,9 +1288,9 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi)
 	hidg->report_length = opts->report_length;
 	hidg->report_desc_length = opts->report_desc_length;
 	if (opts->report_desc) {
-		hidg->report_desc = devm_kmemdup(&hidg->dev, opts->report_desc,
-						 opts->report_desc_length,
-						 GFP_KERNEL);
+		hidg->report_desc = kmemdup(opts->report_desc,
+					    opts->report_desc_length,
+					    GFP_KERNEL);
 		if (!hidg->report_desc) {
 			ret = -ENOMEM;
 			goto err_put_device;
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index ded9531f141b..d59f94464b87 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1646,8 +1646,6 @@ static void gadget_unbind_driver(struct device *dev)
 
 	dev_dbg(&udc->dev, "unbinding gadget driver [%s]\n", driver->function);
 
-	kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
-
 	udc->allow_connect = false;
 	cancel_work_sync(&udc->vbus_work);
 	mutex_lock(&udc->connect_lock);
@@ -1667,6 +1665,8 @@ static void gadget_unbind_driver(struct device *dev)
 	driver->is_bound = false;
 	udc->driver = NULL;
 	mutex_unlock(&udc_lock);
+
+	kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
 }
 
 /* ------------------------------------------------------------------------- */
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 95ed9404f6f8..d6fc08e5db8f 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -535,8 +535,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	/* xHC spec requires PCI devices to support D3hot and D3cold */
 	if (xhci->hci_version >= 0x120)
 		xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
-	else if (pdev->vendor == PCI_VENDOR_ID_AMD && xhci->hci_version >= 0x110)
-		xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
 
 	if (xhci->quirks & XHCI_RESET_ON_RESUME)
 		xhci_dbg_trace(xhci, trace_xhci_dbg_quirks,
diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 2e0451bd336e..16a670828dde 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -267,7 +267,7 @@ static void typec_altmode_put_partner(struct altmode *altmode)
 	if (!partner)
 		return;
 
-	adev = &partner->adev;
+	adev = &altmode->adev;
 
 	if (is_typec_plug(adev->dev.parent)) {
 		struct typec_plug *plug = to_typec_plug(adev->dev.parent);
@@ -497,7 +497,8 @@ static void typec_altmode_release(struct device *dev)
 {
 	struct altmode *alt = to_altmode(to_typec_altmode(dev));
 
-	typec_altmode_put_partner(alt);
+	if (!is_typec_port(dev->parent))
+		typec_altmode_put_partner(alt);
 
 	altmode_id_remove(alt->adev.dev.parent, alt->id);
 	kfree(alt);
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 12ac3397f39b..26ba7da6b410 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2815,13 +2815,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
 	struct mlx5_control_vq *cvq = &mvdev->cvq;
 	int err = 0;
 
-	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
+	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
+		u16 idx = cvq->vring.last_avail_idx;
+
 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
 					MLX5_CVQ_MAX_ENT, false,
 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
 					(struct vring_used *)(uintptr_t)cvq->device_addr);
 
+		if (!err)
+			cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
+	}
 	return err;
 }
 
diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c
index 9b04aad6ec35..c328e694f6e7 100644
--- a/drivers/vdpa/pds/debugfs.c
+++ b/drivers/vdpa/pds/debugfs.c
@@ -261,7 +261,7 @@ void pds_vdpa_debugfs_add_vdpadev(struct pds_vdpa_aux *vdpa_aux)
 	debugfs_create_file("config", 0400, vdpa_aux->dentry, vdpa_aux->pdsv, &config_fops);
 
 	for (i = 0; i < vdpa_aux->pdsv->num_vqs; i++) {
-		char name[8];
+		char name[16];
 
 		snprintf(name, sizeof(name), "vq%02d", i);
 		debugfs_create_file(name, 0400, vdpa_aux->dentry,
diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c
index 52b2449182ad..25c0fe5ec3d5 100644
--- a/drivers/vdpa/pds/vdpa_dev.c
+++ b/drivers/vdpa/pds/vdpa_dev.c
@@ -318,9 +318,8 @@ static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 featur
 		return -EOPNOTSUPP;
 	}
 
-	pdsv->negotiated_features = nego_features;
-
 	driver_features = pds_vdpa_get_driver_features(vdpa_dev);
+	pdsv->negotiated_features = nego_features;
 	dev_dbg(dev, "%s: %#llx => %#llx\n",
 		__func__, driver_features, nego_features);
 
@@ -461,8 +460,10 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
 
 	pds_vdpa_cmd_set_status(pdsv, status);
 
-	/* Note: still working with FW on the need for this reset cmd */
 	if (status == 0) {
+		struct vdpa_callback null_cb = { };
+
+		pds_vdpa_set_config_cb(vdpa_dev, &null_cb);
 		pds_vdpa_cmd_reset(pdsv);
 
 		for (i = 0; i < pdsv->num_vqs; i++) {
diff --git a/drivers/vfio/pci/pds/pci_drv.c b/drivers/vfio/pci/pds/pci_drv.c
index dd8c00c895a2..a34dda516629 100644
--- a/drivers/vfio/pci/pds/pci_drv.c
+++ b/drivers/vfio/pci/pds/pci_drv.c
@@ -55,10 +55,10 @@ static void pds_vfio_recovery(struct pds_vfio_pci_device *pds_vfio)
 	 * VFIO_DEVICE_STATE_RUNNING.
 	 */
 	if (deferred_reset_needed) {
-		spin_lock(&pds_vfio->reset_lock);
+		mutex_lock(&pds_vfio->reset_mutex);
 		pds_vfio->deferred_reset = true;
 		pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_ERROR;
-		spin_unlock(&pds_vfio->reset_lock);
+		mutex_unlock(&pds_vfio->reset_mutex);
 	}
 }
 
diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c
index 649b18ee394b..4c351c59d05a 100644
--- a/drivers/vfio/pci/pds/vfio_dev.c
+++ b/drivers/vfio/pci/pds/vfio_dev.c
@@ -29,7 +29,7 @@ struct pds_vfio_pci_device *pds_vfio_pci_drvdata(struct pci_dev *pdev)
 void pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio)
 {
 again:
-	spin_lock(&pds_vfio->reset_lock);
+	mutex_lock(&pds_vfio->reset_mutex);
 	if (pds_vfio->deferred_reset) {
 		pds_vfio->deferred_reset = false;
 		if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) {
@@ -39,23 +39,23 @@ again:
 		}
 		pds_vfio->state = pds_vfio->deferred_reset_state;
 		pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
-		spin_unlock(&pds_vfio->reset_lock);
+		mutex_unlock(&pds_vfio->reset_mutex);
 		goto again;
 	}
 	mutex_unlock(&pds_vfio->state_mutex);
-	spin_unlock(&pds_vfio->reset_lock);
+	mutex_unlock(&pds_vfio->reset_mutex);
 }
 
 void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio)
 {
-	spin_lock(&pds_vfio->reset_lock);
+	mutex_lock(&pds_vfio->reset_mutex);
 	pds_vfio->deferred_reset = true;
 	pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
 	if (!mutex_trylock(&pds_vfio->state_mutex)) {
-		spin_unlock(&pds_vfio->reset_lock);
+		mutex_unlock(&pds_vfio->reset_mutex);
 		return;
 	}
-	spin_unlock(&pds_vfio->reset_lock);
+	mutex_unlock(&pds_vfio->reset_mutex);
 	pds_vfio_state_mutex_unlock(pds_vfio);
 }
 
@@ -155,6 +155,9 @@ static int pds_vfio_init_device(struct vfio_device *vdev)
 
 	pds_vfio->vf_id = vf_id;
 
+	mutex_init(&pds_vfio->state_mutex);
+	mutex_init(&pds_vfio->reset_mutex);
+
 	vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P;
 	vdev->mig_ops = &pds_vfio_lm_ops;
 	vdev->log_ops = &pds_vfio_log_ops;
@@ -168,6 +171,17 @@ static int pds_vfio_init_device(struct vfio_device *vdev)
 	return 0;
 }
 
+static void pds_vfio_release_device(struct vfio_device *vdev)
+{
+	struct pds_vfio_pci_device *pds_vfio =
+		container_of(vdev, struct pds_vfio_pci_device,
+			     vfio_coredev.vdev);
+
+	mutex_destroy(&pds_vfio->state_mutex);
+	mutex_destroy(&pds_vfio->reset_mutex);
+	vfio_pci_core_release_dev(vdev);
+}
+
 static int pds_vfio_open_device(struct vfio_device *vdev)
 {
 	struct pds_vfio_pci_device *pds_vfio =
@@ -179,7 +193,6 @@ static int pds_vfio_open_device(struct vfio_device *vdev)
 	if (err)
 		return err;
 
-	mutex_init(&pds_vfio->state_mutex);
 	pds_vfio->state = VFIO_DEVICE_STATE_RUNNING;
 	pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
 
@@ -199,14 +212,13 @@ static void pds_vfio_close_device(struct vfio_device *vdev)
 	pds_vfio_put_save_file(pds_vfio);
 	pds_vfio_dirty_disable(pds_vfio, true);
 	mutex_unlock(&pds_vfio->state_mutex);
-	mutex_destroy(&pds_vfio->state_mutex);
 	vfio_pci_core_close_device(vdev);
 }
 
 static const struct vfio_device_ops pds_vfio_ops = {
 	.name = "pds-vfio",
 	.init = pds_vfio_init_device,
-	.release = vfio_pci_core_release_dev,
+	.release = pds_vfio_release_device,
 	.open_device = pds_vfio_open_device,
 	.close_device = pds_vfio_close_device,
 	.ioctl = vfio_pci_core_ioctl,
diff --git a/drivers/vfio/pci/pds/vfio_dev.h b/drivers/vfio/pci/pds/vfio_dev.h
index b8f2d667608f..e7b01080a1ec 100644
--- a/drivers/vfio/pci/pds/vfio_dev.h
+++ b/drivers/vfio/pci/pds/vfio_dev.h
@@ -18,7 +18,7 @@ struct pds_vfio_pci_device {
 	struct pds_vfio_dirty dirty;
 	struct mutex state_mutex; /* protect migration state */
 	enum vfio_device_mig_state state;
-	spinlock_t reset_lock; /* protect reset_done flow */
+	struct mutex reset_mutex; /* protect reset_done flow */
 	u8 deferred_reset;
 	enum vfio_device_mig_state deferred_reset_state;
 	struct notifier_block nb;
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index f5edb9e27e3c..b8cfea7812d6 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1110,8 +1110,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 
 	for (i = 0; i < nvec; i++) {
 		info = xen_irq_init(irq + i);
-		if (!info)
+		if (!info) {
+			ret = -ENOMEM;
 			goto error_irq;
+		}
 
 		irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
 
diff --git a/fs/Kconfig b/fs/Kconfig
index fd1f655b4f1f..42837617a55b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -268,6 +268,7 @@ config HUGETLBFS
 
 config HUGETLB_PAGE
 	def_bool HUGETLBFS
+	select XARRAY_MULTI
 
 config HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 	def_bool HUGETLB_PAGE
diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
index c08c2c7d6fbb..fddc7be58022 100644
--- a/fs/bcachefs/Kconfig
+++ b/fs/bcachefs/Kconfig
@@ -33,6 +33,18 @@ config BCACHEFS_QUOTA
 	depends on BCACHEFS_FS
 	select QUOTACTL
 
+config BCACHEFS_ERASURE_CODING
+	bool "bcachefs erasure coding (RAID5/6) support (EXPERIMENTAL)"
+	depends on BCACHEFS_FS
+	select QUOTACTL
+	help
+	This enables the "erasure_code" filesysystem and inode option, which
+	organizes data into reed-solomon stripes instead of ordinary
+	replication.
+
+	WARNING: this feature is still undergoing on disk format changes, and
+	should only be enabled for testing purposes.
+
 config BCACHEFS_POSIX_ACL
 	bool "bcachefs POSIX ACL support"
 	depends on BCACHEFS_FS
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index b85c7765272f..1ba0eeb7552a 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1297,6 +1297,30 @@ out:
 	return wp;
 }
 
+static noinline void
+deallocate_extra_replicas(struct bch_fs *c,
+			  struct open_buckets *ptrs,
+			  struct open_buckets *ptrs_no_use,
+			  unsigned extra_replicas)
+{
+	struct open_buckets ptrs2 = { 0 };
+	struct open_bucket *ob;
+	unsigned i;
+
+	open_bucket_for_each(c, ptrs, ob, i) {
+		unsigned d = bch_dev_bkey_exists(c, ob->dev)->mi.durability;
+
+		if (d && d <= extra_replicas) {
+			extra_replicas -= d;
+			ob_push(c, ptrs_no_use, ob);
+		} else {
+			ob_push(c, &ptrs2, ob);
+		}
+	}
+
+	*ptrs = ptrs2;
+}
+
 /*
  * Get us an open_bucket we can allocate from, return with it locked:
  */
@@ -1321,6 +1345,9 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
 	int ret;
 	int i;
 
+	if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
+		erasure_code = false;
+
 	BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
 
 	BUG_ON(!nr_replicas || !nr_replicas_required);
@@ -1382,6 +1409,9 @@ alloc_done:
 	if (ret)
 		goto err;
 
+	if (nr_effective > nr_replicas)
+		deallocate_extra_replicas(c, &ptrs, &wp->ptrs, nr_effective - nr_replicas);
+
 	/* Free buckets we didn't use: */
 	open_bucket_for_each(c, &wp->ptrs, ob, i)
 		open_bucket_free_unused(c, ob);
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 403aa3389fcc..dfa22f9d9a1d 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -638,6 +638,8 @@ struct journal_keys {
 	size_t			gap;
 	size_t			nr;
 	size_t			size;
+	atomic_t		ref;
+	bool			initial_ref_held;
 };
 
 struct btree_trans_buf {
@@ -929,7 +931,7 @@ struct bch_fs {
 	mempool_t		compression_bounce[2];
 	mempool_t		compress_workspace[BCH_COMPRESSION_TYPE_NR];
 	mempool_t		decompress_workspace;
-	ZSTD_parameters		zstd_params;
+	size_t			zstd_workspace_size;
 
 	struct crypto_shash	*sha256;
 	struct crypto_sync_skcipher *chacha20;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 0a750953ff92..1ab1f08d763b 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -151,7 +151,11 @@ struct bpos {
 #else
 #error edit for your odd byteorder.
 #endif
-} __packed __aligned(4);
+} __packed
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+__aligned(4)
+#endif
+;
 
 #define KEY_INODE_MAX			((__u64)~0ULL)
 #define KEY_OFFSET_MAX			((__u64)~0ULL)
@@ -1528,7 +1532,7 @@ struct bch_sb_field_disk_groups {
 	x(move_extent_write,				36)	\
 	x(move_extent_finish,				37)	\
 	x(move_extent_fail,				38)	\
-	x(move_extent_alloc_mem_fail,			39)	\
+	x(move_extent_start_fail,			39)	\
 	x(copygc,					40)	\
 	x(copygc_wait,					41)	\
 	x(gc_gens_end,					42)	\
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 0b5d09c8475d..30ab78a24517 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -1541,8 +1541,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
 		rcu_assign_pointer(ca->buckets_gc, buckets);
 	}
 
-	for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
-			   BTREE_ITER_PREFETCH, k, ret) {
+	ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
+				  BTREE_ITER_PREFETCH, k, ({
 		ca = bch_dev_bkey_exists(c, k.k->p.inode);
 		g = gc_bucket(ca, k.k->p.offset);
 
@@ -1561,8 +1561,9 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
 			g->stripe		= a->stripe;
 			g->stripe_redundancy	= a->stripe_redundancy;
 		}
-	}
-	bch2_trans_iter_exit(trans, &iter);
+
+		0;
+	}));
 err:
 	bch2_trans_put(trans);
 	if (ret)
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 37d896edb06e..57c20390e10e 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1358,10 +1358,9 @@ static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void *
 	return offset;
 }
 
-static void btree_node_read_all_replicas_done(struct closure *cl)
+static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
 {
-	struct btree_node_read_all *ra =
-		container_of(cl, struct btree_node_read_all, cl);
+	closure_type(ra, struct btree_node_read_all, cl);
 	struct bch_fs *c = ra->c;
 	struct btree *b = ra->b;
 	struct printbuf buf = PRINTBUF;
@@ -1567,7 +1566,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
 
 	if (sync) {
 		closure_sync(&ra->cl);
-		btree_node_read_all_replicas_done(&ra->cl);
+		btree_node_read_all_replicas_done(&ra->cl.work);
 	} else {
 		continue_at(&ra->cl, btree_node_read_all_replicas_done,
 			    c->io_complete_wq);
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 6fa90bcd7016..8e0fe65f6101 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2981,7 +2981,8 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
 	trans->fn_idx		= fn_idx;
 	trans->locking_wait.task = current;
 	trans->journal_replay_not_finished =
-		!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
+		unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) &&
+		atomic_inc_not_zero(&c->journal_keys.ref);
 	closure_init_stack(&trans->ref);
 
 	s = btree_trans_stats(trans);
@@ -3098,6 +3099,9 @@ void bch2_trans_put(struct btree_trans *trans)
 			kfree(trans->fs_usage_deltas);
 	}
 
+	if (unlikely(trans->journal_replay_not_finished))
+		bch2_journal_keys_put(c);
+
 	if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
 		mempool_free(trans->mem, &c->btree_trans_mem_pool);
 	else
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index 58a981bcf3aa..ec52f50d249d 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -80,6 +80,8 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree
 	struct journal_keys *keys = &c->journal_keys;
 	unsigned iters = 0;
 	struct journal_key *k;
+
+	BUG_ON(*idx > keys->nr);
 search:
 	if (!*idx)
 		*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
@@ -189,10 +191,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
 		/* Since @keys was full, there was no gap: */
 		memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr);
 		kvfree(keys->d);
-		*keys = new_keys;
+		keys->d		= new_keys.d;
+		keys->nr	= new_keys.nr;
+		keys->size	= new_keys.size;
 
 		/* And now the gap is at the end: */
-		keys->gap = keys->nr;
+		keys->gap	= keys->nr;
 	}
 
 	journal_iters_move_gap(c, keys->gap, idx);
@@ -415,10 +419,16 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
 		cmp_int(l->journal_offset, r->journal_offset);
 }
 
-void bch2_journal_keys_free(struct journal_keys *keys)
+void bch2_journal_keys_put(struct bch_fs *c)
 {
+	struct journal_keys *keys = &c->journal_keys;
 	struct journal_key *i;
 
+	BUG_ON(atomic_read(&keys->ref) <= 0);
+
+	if (!atomic_dec_and_test(&keys->ref))
+		return;
+
 	move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
 	keys->gap = keys->nr;
 
@@ -429,6 +439,8 @@ void bch2_journal_keys_free(struct journal_keys *keys)
 	kvfree(keys->d);
 	keys->d = NULL;
 	keys->nr = keys->gap = keys->size = 0;
+
+	bch2_journal_entries_free(c);
 }
 
 static void __journal_keys_sort(struct journal_keys *keys)
diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h
index 5d64e7e22f26..8ca4c100b2e3 100644
--- a/fs/bcachefs/btree_journal_iter.h
+++ b/fs/bcachefs/btree_journal_iter.h
@@ -49,7 +49,15 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
 						struct bch_fs *,
 						struct btree *);
 
-void bch2_journal_keys_free(struct journal_keys *);
+void bch2_journal_keys_put(struct bch_fs *);
+
+static inline void bch2_journal_keys_put_initial(struct bch_fs *c)
+{
+	if (c->journal_keys.initial_ref_held)
+		bch2_journal_keys_put(c);
+	c->journal_keys.initial_ref_held = false;
+}
+
 void bch2_journal_entries_free(struct bch_fs *);
 
 int bch2_journal_keys_sort(struct bch_fs *);
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 76f27bc9fa24..6697417273aa 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -778,9 +778,9 @@ static void btree_interior_update_work(struct work_struct *work)
 	}
 }
 
-static void btree_update_set_nodes_written(struct closure *cl)
+static CLOSURE_CALLBACK(btree_update_set_nodes_written)
 {
-	struct btree_update *as = container_of(cl, struct btree_update, cl);
+	closure_type(as, struct btree_update, cl);
 	struct bch_fs *c = as->c;
 
 	mutex_lock(&c->btree_interior_update_lock);
@@ -1071,8 +1071,12 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
 			break;
 		}
 
+		/*
+		 * Always check for space for two keys, even if we won't have to
+		 * split at prior level - it might have been a merge instead:
+		 */
 		if (bch2_btree_node_insert_fits(c, path->l[update_level].b,
-					BKEY_BTREE_PTR_U64s_MAX * (1 + split)))
+						BKEY_BTREE_PTR_U64s_MAX * 2))
 			break;
 
 		split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
@@ -2266,6 +2270,10 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
 
 	BUG_ON(!btree_node_hashed(b));
 
+	struct bch_extent_ptr *ptr;
+	bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr,
+			    !bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev));
+
 	ret = bch2_btree_node_update_key(trans, &iter, b, new_key,
 					 commit_flags, skip_triggers);
 out:
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 58d8c6ffd955..5a91d3189fcf 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -854,8 +854,12 @@ static int __mark_pointer(struct btree_trans *trans,
 		return ret;
 
 	*dst_sectors += sectors;
-	*bucket_data_type = *dirty_sectors || *cached_sectors
-		? ptr_data_type : 0;
+
+	if (!*dirty_sectors && !*cached_sectors)
+		*bucket_data_type = 0;
+	else if (*bucket_data_type != BCH_DATA_stripe)
+		*bucket_data_type = ptr_data_type;
+
 	return 0;
 }
 
@@ -2091,8 +2095,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 	bucket_gens->first_bucket = ca->mi.first_bucket;
 	bucket_gens->nbuckets	= nbuckets;
 
-	bch2_copygc_stop(c);
-
 	if (resize) {
 		down_write(&c->gc_lock);
 		down_write(&ca->bucket_lock);
diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
index a8b148ec2a2b..51af8ea230ed 100644
--- a/fs/bcachefs/compress.c
+++ b/fs/bcachefs/compress.c
@@ -354,8 +354,7 @@ static int attempt_compress(struct bch_fs *c,
 		 */
 		unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
 		ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
-		ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
-			zstd_cctx_workspace_bound(&params.cParams));
+		ZSTD_CCtx *ctx = zstd_init_cctx(workspace, c->zstd_workspace_size);
 
 		/*
 		 * ZSTD requires that when we decompress we pass in the exact
@@ -371,7 +370,7 @@ static int attempt_compress(struct bch_fs *c,
 		size_t len = zstd_compress_cctx(ctx,
 				dst + 4,	dst_len - 4 - 7,
 				src,		src_len,
-				&c->zstd_params);
+				&params);
 		if (zstd_is_error(len))
 			return 0;
 
@@ -572,6 +571,13 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
 	size_t decompress_workspace_size = 0;
 	ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
 						 c->opts.encoded_extent_max);
+
+	/*
+	 * ZSTD is lying: if we allocate the size of the workspace it says it
+	 * requires, it returns memory allocation errors
+	 */
+	c->zstd_workspace_size = zstd_cctx_workspace_bound(&params.cParams);
+
 	struct {
 		unsigned			feature;
 		enum bch_compression_type	type;
@@ -585,13 +591,11 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
 			zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
 			zlib_inflate_workspacesize(), },
 		{ BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
-			zstd_cctx_workspace_bound(&params.cParams),
+			c->zstd_workspace_size,
 			zstd_dctx_workspace_bound() },
 	}, *i;
 	bool have_compressed = false;
 
-	c->zstd_params = params;
-
 	for (i = compression_types;
 	     i < compression_types + ARRAY_SIZE(compression_types);
 	     i++)
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 5ed66202c226..71aa5e59787b 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -356,7 +356,7 @@ void bch2_data_update_exit(struct data_update *update)
 	bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
 }
 
-void bch2_update_unwritten_extent(struct btree_trans *trans,
+static void bch2_update_unwritten_extent(struct btree_trans *trans,
 				  struct data_update *update)
 {
 	struct bch_fs *c = update->op.c;
@@ -436,7 +436,51 @@ void bch2_update_unwritten_extent(struct btree_trans *trans,
 	}
 }
 
+int bch2_extent_drop_ptrs(struct btree_trans *trans,
+			  struct btree_iter *iter,
+			  struct bkey_s_c k,
+			  struct data_update_opts data_opts)
+{
+	struct bch_fs *c = trans->c;
+	struct bkey_i *n;
+	int ret;
+
+	n = bch2_bkey_make_mut_noupdate(trans, k);
+	ret = PTR_ERR_OR_ZERO(n);
+	if (ret)
+		return ret;
+
+	while (data_opts.kill_ptrs) {
+		unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
+		struct bch_extent_ptr *ptr;
+
+		bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
+		data_opts.kill_ptrs ^= 1U << drop;
+	}
+
+	/*
+	 * If the new extent no longer has any pointers, bch2_extent_normalize()
+	 * will do the appropriate thing with it (turning it into a
+	 * KEY_TYPE_error key, or just a discard if it was a cached extent)
+	 */
+	bch2_extent_normalize(c, bkey_i_to_s(n));
+
+	/*
+	 * Since we're not inserting through an extent iterator
+	 * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
+	 * we aren't using the extent overwrite path to delete, we're
+	 * just using the normal key deletion path:
+	 */
+	if (bkey_deleted(&n->k))
+		n->k.size = 0;
+
+	return bch2_trans_relock(trans) ?:
+		bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+		bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
+}
+
 int bch2_data_update_init(struct btree_trans *trans,
+			  struct btree_iter *iter,
 			  struct moving_context *ctxt,
 			  struct data_update *m,
 			  struct write_point_specifier wp,
@@ -452,7 +496,7 @@ int bch2_data_update_init(struct btree_trans *trans,
 	const struct bch_extent_ptr *ptr;
 	unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
 	unsigned ptrs_locked = 0;
-	int ret;
+	int ret = 0;
 
 	bch2_bkey_buf_init(&m->k);
 	bch2_bkey_buf_reassemble(&m->k, c, k);
@@ -478,6 +522,8 @@ int bch2_data_update_init(struct btree_trans *trans,
 	bkey_for_each_ptr(ptrs, ptr)
 		percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref);
 
+	unsigned durability_have = 0, durability_removing = 0;
+
 	i = 0;
 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
 		bool locked;
@@ -489,8 +535,11 @@ int bch2_data_update_init(struct btree_trans *trans,
 				reserve_sectors += k.k->size;
 
 			m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p);
-		} else if (!p.ptr.cached) {
+			durability_removing += bch2_extent_ptr_desired_durability(c, &p);
+		} else if (!p.ptr.cached &&
+			   !((1U << i) & m->data_opts.kill_ptrs)) {
 			bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
+			durability_have += bch2_extent_ptr_durability(c, &p);
 		}
 
 		/*
@@ -529,6 +578,29 @@ int bch2_data_update_init(struct btree_trans *trans,
 		i++;
 	}
 
+	/*
+	 * If current extent durability is less than io_opts.data_replicas,
+	 * we're not trying to rereplicate the extent up to data_replicas here -
+	 * unless extra_replicas was specified
+	 *
+	 * Increasing replication is an explicit operation triggered by
+	 * rereplicate, currently, so that users don't get an unexpected -ENOSPC
+	 */
+	if (durability_have >= io_opts.data_replicas) {
+		m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
+		m->data_opts.rewrite_ptrs = 0;
+		/* if iter == NULL, it's just a promote */
+		if (iter)
+			ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts);
+		goto done;
+	}
+
+	m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
+		m->data_opts.extra_replicas;
+	m->op.nr_replicas_required = m->op.nr_replicas;
+
+	BUG_ON(!m->op.nr_replicas);
+
 	if (reserve_sectors) {
 		ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
 				m->data_opts.extra_replicas
@@ -538,14 +610,11 @@ int bch2_data_update_init(struct btree_trans *trans,
 			goto err;
 	}
 
-	m->op.nr_replicas += m->data_opts.extra_replicas;
-	m->op.nr_replicas_required = m->op.nr_replicas;
-
-	BUG_ON(!m->op.nr_replicas);
+	if (bkey_extent_is_unwritten(k)) {
+		bch2_update_unwritten_extent(trans, m);
+		goto done;
+	}
 
-	/* Special handling required: */
-	if (bkey_extent_is_unwritten(k))
-		return -BCH_ERR_unwritten_extent_update;
 	return 0;
 err:
 	i = 0;
@@ -560,6 +629,9 @@ err:
 	bch2_bkey_buf_exit(&m->k, c);
 	bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
 	return ret;
+done:
+	bch2_data_update_exit(m);
+	return ret ?: -BCH_ERR_data_update_done;
 }
 
 void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index 9dc17b9d8379..991095bbd469 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -32,9 +32,14 @@ int bch2_data_update_index_update(struct bch_write_op *);
 void bch2_data_update_read_done(struct data_update *,
 				struct bch_extent_crc_unpacked);
 
+int bch2_extent_drop_ptrs(struct btree_trans *,
+			  struct btree_iter *,
+			  struct bkey_s_c,
+			  struct data_update_opts);
+
 void bch2_data_update_exit(struct data_update *);
-void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *);
-int bch2_data_update_init(struct btree_trans *, struct moving_context *,
+int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
+			  struct moving_context *,
 			  struct data_update *,
 			  struct write_point_specifier,
 			  struct bch_io_opts, struct data_update_opts,
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 68a1a96bb7ca..ae7910bf2228 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -162,7 +162,7 @@
 	x(BCH_ERR_fsck,			fsck_repair_unimplemented)		\
 	x(BCH_ERR_fsck,			fsck_repair_impossible)			\
 	x(0,				restart_recovery)			\
-	x(0,				unwritten_extent_update)		\
+	x(0,				data_update_done)			\
 	x(EINVAL,			device_state_not_allowed)		\
 	x(EINVAL,			member_info_missing)			\
 	x(EINVAL,			mismatched_block_size)			\
@@ -210,6 +210,7 @@
 	x(BCH_ERR_invalid_sb,		invalid_sb_members)			\
 	x(BCH_ERR_invalid_sb,		invalid_sb_disk_groups)			\
 	x(BCH_ERR_invalid_sb,		invalid_sb_replicas)			\
+	x(BCH_ERR_invalid_sb,		invalid_replicas_entry)			\
 	x(BCH_ERR_invalid_sb,		invalid_sb_journal)			\
 	x(BCH_ERR_invalid_sb,		invalid_sb_journal_seq_blacklist)	\
 	x(BCH_ERR_invalid_sb,		invalid_sb_crypt)			\
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index a864de231b69..f6c92df55270 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -649,37 +649,31 @@ unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
 	return replicas;
 }
 
-unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+static inline unsigned __extent_ptr_durability(struct bch_dev *ca, struct extent_ptr_decoded *p)
 {
-	struct bch_dev *ca;
-
 	if (p->ptr.cached)
 		return 0;
 
-	ca = bch_dev_bkey_exists(c, p->ptr.dev);
-
-	return ca->mi.durability +
-		(p->has_ec
-		 ? p->ec.redundancy
-		 : 0);
+	return p->has_ec
+		? p->ec.redundancy + 1
+		: ca->mi.durability;
 }
 
-unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
 {
-	struct bch_dev *ca;
+	struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
 
-	if (p->ptr.cached)
-		return 0;
+	return __extent_ptr_durability(ca, p);
+}
 
-	ca = bch_dev_bkey_exists(c, p->ptr.dev);
+unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+{
+	struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
 
 	if (ca->mi.state == BCH_MEMBER_STATE_failed)
 		return 0;
 
-	return ca->mi.durability +
-		(p->has_ec
-		 ? p->ec.redundancy
-		 : 0);
+	return __extent_ptr_durability(ca, p);
 }
 
 unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 5b42a76c4796..9a479e4de6b3 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -35,9 +35,9 @@ static void bio_check_or_release(struct bio *bio, bool check_dirty)
 	}
 }
 
-static void bch2_dio_read_complete(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_dio_read_complete)
 {
-	struct dio_read *dio = container_of(cl, struct dio_read, cl);
+	closure_type(dio, struct dio_read, cl);
 
 	dio->req->ki_complete(dio->req, dio->ret);
 	bio_check_or_release(&dio->rbio.bio, dio->should_dirty);
@@ -325,9 +325,9 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
 	return 0;
 }
 
-static void bch2_dio_write_flush_done(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_dio_write_flush_done)
 {
-	struct dio_write *dio = container_of(cl, struct dio_write, op.cl);
+	closure_type(dio, struct dio_write, op.cl);
 	struct bch_fs *c = dio->op.c;
 
 	closure_debug_destroy(cl);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 8ef817304e4a..4d51be813509 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1667,8 +1667,7 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
 		if (!first)
 			seq_putc(seq, ':');
 		first = false;
-		seq_puts(seq, "/dev/");
-		seq_puts(seq, ca->name);
+		seq_puts(seq, ca->disk_sb.sb_name);
 	}
 
 	return 0;
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index a56ed553dc15..36763865facd 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -209,7 +209,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
 	bio = &op->write.op.wbio.bio;
 	bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
 
-	ret = bch2_data_update_init(trans, NULL, &op->write,
+	ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
 			writepoint_hashed((unsigned long) current),
 			opts,
 			(struct data_update_opts) {
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index d704a8f829c8..8ede46b1e354 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -580,9 +580,9 @@ static inline void wp_update_state(struct write_point *wp, bool running)
 	__wp_update_state(wp, state);
 }
 
-static void bch2_write_index(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_write_index)
 {
-	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+	closure_type(op, struct bch_write_op, cl);
 	struct write_point *wp = op->wp;
 	struct workqueue_struct *wq = index_update_wq(op);
 	unsigned long flags;
@@ -1208,9 +1208,9 @@ static void __bch2_nocow_write_done(struct bch_write_op *op)
 		bch2_nocow_write_convert_unwritten(op);
 }
 
-static void bch2_nocow_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_nocow_write_done)
 {
-	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+	closure_type(op, struct bch_write_op, cl);
 
 	__bch2_nocow_write_done(op);
 	bch2_write_done(cl);
@@ -1363,7 +1363,7 @@ err:
 		op->insert_keys.top = op->insert_keys.keys;
 	} else if (op->flags & BCH_WRITE_SYNC) {
 		closure_sync(&op->cl);
-		bch2_nocow_write_done(&op->cl);
+		bch2_nocow_write_done(&op->cl.work);
 	} else {
 		/*
 		 * XXX
@@ -1566,9 +1566,9 @@ err:
  * If op->discard is true, instead of inserting the data it invalidates the
  * region of the cache represented by op->bio and op->inode.
  */
-void bch2_write(struct closure *cl)
+CLOSURE_CALLBACK(bch2_write)
 {
-	struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+	closure_type(op, struct bch_write_op, cl);
 	struct bio *bio = &op->wbio.bio;
 	struct bch_fs *c = op->c;
 	unsigned data_len;
diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h
index 9323167229ee..6c276a48f95d 100644
--- a/fs/bcachefs/io_write.h
+++ b/fs/bcachefs/io_write.h
@@ -90,8 +90,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
 	op->devs_need_flush	= NULL;
 }
 
-void bch2_write(struct closure *);
-
+CLOSURE_CALLBACK(bch2_write);
 void bch2_write_point_do_index_updates(struct work_struct *);
 
 static inline struct bch_write_bio *wbio_init(struct bio *bio)
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 23a9b7845d11..489b34046e78 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -321,6 +321,8 @@ static int journal_entry_open(struct journal *j)
 	atomic64_inc(&j->seq);
 	journal_pin_list_init(fifo_push_ref(&j->pin), 1);
 
+	BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
+
 	BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf);
 
 	bkey_extent_init(&buf->key);
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index c85d01cf4948..4c513fca5ef2 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -136,9 +136,7 @@ static inline u64 journal_last_seq(struct journal *j)
 
 static inline u64 journal_cur_seq(struct journal *j)
 {
-	EBUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
-
-	return j->pin.back - 1;
+	return atomic64_read(&j->seq);
 }
 
 static inline u64 journal_last_unwritten_seq(struct journal *j)
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 786a09285509..0f17fc5f8d68 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -547,6 +547,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
 	struct jset_entry_data_usage *u =
 		container_of(entry, struct jset_entry_data_usage, entry);
 	unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
+	struct printbuf err = PRINTBUF;
 	int ret = 0;
 
 	if (journal_entry_err_on(bytes < sizeof(*u) ||
@@ -555,10 +556,19 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
 				 journal_entry_data_usage_bad_size,
 				 "invalid journal entry usage: bad size")) {
 		journal_entry_null_range(entry, vstruct_next(entry));
-		return ret;
+		goto out;
 	}
 
+	if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err),
+				 c, version, jset, entry,
+				 journal_entry_data_usage_bad_size,
+				 "invalid journal entry usage: %s", err.buf)) {
+		journal_entry_null_range(entry, vstruct_next(entry));
+		goto out;
+	}
+out:
 fsck_err:
+	printbuf_exit(&err);
 	return ret;
 }
 
@@ -1025,10 +1035,9 @@ next_block:
 	return 0;
 }
 
-static void bch2_journal_read_device(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_journal_read_device)
 {
-	struct journal_device *ja =
-		container_of(cl, struct journal_device, read);
+	closure_type(ja, struct journal_device, read);
 	struct bch_dev *ca = container_of(ja, struct bch_dev, journal);
 	struct bch_fs *c = ca->fs;
 	struct journal_list *jlist =
@@ -1520,9 +1529,9 @@ static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
 	return j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK);
 }
 
-static void journal_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write_done)
 {
-	struct journal *j = container_of(cl, struct journal, io);
+	closure_type(j, struct journal, io);
 	struct bch_fs *c = container_of(j, struct bch_fs, journal);
 	struct journal_buf *w = journal_last_unwritten_buf(j);
 	struct bch_replicas_padded replicas;
@@ -1638,9 +1647,9 @@ static void journal_write_endio(struct bio *bio)
 	percpu_ref_put(&ca->io_ref);
 }
 
-static void do_journal_write(struct closure *cl)
+static CLOSURE_CALLBACK(do_journal_write)
 {
-	struct journal *j = container_of(cl, struct journal, io);
+	closure_type(j, struct journal, io);
 	struct bch_fs *c = container_of(j, struct bch_fs, journal);
 	struct bch_dev *ca;
 	struct journal_buf *w = journal_last_unwritten_buf(j);
@@ -1850,9 +1859,9 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *
 	return 0;
 }
 
-void bch2_journal_write(struct closure *cl)
+CLOSURE_CALLBACK(bch2_journal_write)
 {
-	struct journal *j = container_of(cl, struct journal, io);
+	closure_type(j, struct journal, io);
 	struct bch_fs *c = container_of(j, struct bch_fs, journal);
 	struct bch_dev *ca;
 	struct journal_buf *w = journal_last_unwritten_buf(j);
diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h
index a88d097b13f1..c035e7c108e1 100644
--- a/fs/bcachefs/journal_io.h
+++ b/fs/bcachefs/journal_io.h
@@ -60,6 +60,6 @@ void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
 
 int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
 
-void bch2_journal_write(struct closure *);
+CLOSURE_CALLBACK(bch2_journal_write);
 
 #endif /* _BCACHEFS_JOURNAL_IO_H */
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index ab749bf2fcbc..54830ee0ed88 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -49,17 +49,6 @@ static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k)
 	}
 }
 
-static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c k)
-{
-	if (trace_move_extent_alloc_mem_fail_enabled()) {
-		struct printbuf buf = PRINTBUF;
-
-		bch2_bkey_val_to_text(&buf, c, k);
-		trace_move_extent_alloc_mem_fail(c, buf.buf);
-		printbuf_exit(&buf);
-	}
-}
-
 struct moving_io {
 	struct list_head		read_list;
 	struct list_head		io_list;
@@ -163,12 +152,18 @@ void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
 		atomic_read(&ctxt->write_sectors) != sectors_pending);
 }
 
+static void bch2_moving_ctxt_flush_all(struct moving_context *ctxt)
+{
+	move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
+	bch2_trans_unlock_long(ctxt->trans);
+	closure_sync(&ctxt->cl);
+}
+
 void bch2_moving_ctxt_exit(struct moving_context *ctxt)
 {
 	struct bch_fs *c = ctxt->trans->c;
 
-	move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
-	closure_sync(&ctxt->cl);
+	bch2_moving_ctxt_flush_all(ctxt);
 
 	EBUG_ON(atomic_read(&ctxt->write_sectors));
 	EBUG_ON(atomic_read(&ctxt->write_ios));
@@ -223,49 +218,6 @@ void bch2_move_stats_init(struct bch_move_stats *stats, char *name)
 	scnprintf(stats->name, sizeof(stats->name), "%s", name);
 }
 
-static int bch2_extent_drop_ptrs(struct btree_trans *trans,
-				 struct btree_iter *iter,
-				 struct bkey_s_c k,
-				 struct data_update_opts data_opts)
-{
-	struct bch_fs *c = trans->c;
-	struct bkey_i *n;
-	int ret;
-
-	n = bch2_bkey_make_mut_noupdate(trans, k);
-	ret = PTR_ERR_OR_ZERO(n);
-	if (ret)
-		return ret;
-
-	while (data_opts.kill_ptrs) {
-		unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
-		struct bch_extent_ptr *ptr;
-
-		bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
-		data_opts.kill_ptrs ^= 1U << drop;
-	}
-
-	/*
-	 * If the new extent no longer has any pointers, bch2_extent_normalize()
-	 * will do the appropriate thing with it (turning it into a
-	 * KEY_TYPE_error key, or just a discard if it was a cached extent)
-	 */
-	bch2_extent_normalize(c, bkey_i_to_s(n));
-
-	/*
-	 * Since we're not inserting through an extent iterator
-	 * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
-	 * we aren't using the extent overwrite path to delete, we're
-	 * just using the normal key deletion path:
-	 */
-	if (bkey_deleted(&n->k))
-		n->k.size = 0;
-
-	return bch2_trans_relock(trans) ?:
-		bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
-		bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
-}
-
 int bch2_move_extent(struct moving_context *ctxt,
 		     struct move_bucket_in_flight *bucket_in_flight,
 		     struct btree_iter *iter,
@@ -335,19 +287,11 @@ int bch2_move_extent(struct moving_context *ctxt,
 	io->rbio.bio.bi_iter.bi_sector	= bkey_start_offset(k.k);
 	io->rbio.bio.bi_end_io		= move_read_endio;
 
-	ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp,
+	ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
 				    io_opts, data_opts, iter->btree_id, k);
-	if (ret && ret != -BCH_ERR_unwritten_extent_update)
+	if (ret)
 		goto err_free_pages;
 
-	if (ret == -BCH_ERR_unwritten_extent_update) {
-		bch2_update_unwritten_extent(trans, &io->write);
-		move_free(io);
-		return 0;
-	}
-
-	BUG_ON(ret);
-
 	io->write.op.end_io = move_write_done;
 
 	if (ctxt->rate)
@@ -391,8 +335,23 @@ err_free_pages:
 err_free:
 	kfree(io);
 err:
-	this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]);
-	trace_move_extent_alloc_mem_fail2(c, k);
+	if (ret == -BCH_ERR_data_update_done)
+		return 0;
+
+	if (bch2_err_matches(ret, EROFS) ||
+	    bch2_err_matches(ret, BCH_ERR_transaction_restart))
+		return ret;
+
+	this_cpu_inc(c->counters[BCH_COUNTER_move_extent_start_fail]);
+	if (trace_move_extent_start_fail_enabled()) {
+		struct printbuf buf = PRINTBUF;
+
+		bch2_bkey_val_to_text(&buf, c, k);
+		prt_str(&buf, ": ");
+		prt_str(&buf, bch2_err_str(ret));
+		trace_move_extent_start_fail(c, buf.buf);
+		printbuf_exit(&buf);
+	}
 	return ret;
 }
 
@@ -482,37 +441,30 @@ int bch2_move_get_io_opts_one(struct btree_trans *trans,
 int bch2_move_ratelimit(struct moving_context *ctxt)
 {
 	struct bch_fs *c = ctxt->trans->c;
+	bool is_kthread = current->flags & PF_KTHREAD;
 	u64 delay;
 
-	if (ctxt->wait_on_copygc && !c->copygc_running) {
-		bch2_trans_unlock_long(ctxt->trans);
+	if (ctxt->wait_on_copygc && c->copygc_running) {
+		bch2_moving_ctxt_flush_all(ctxt);
 		wait_event_killable(c->copygc_running_wq,
 				    !c->copygc_running ||
-				    kthread_should_stop());
+				    (is_kthread && kthread_should_stop()));
 	}
 
 	do {
 		delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
 
-
-		if (delay) {
-			if (delay > HZ / 10)
-				bch2_trans_unlock_long(ctxt->trans);
-			else
-				bch2_trans_unlock(ctxt->trans);
-			set_current_state(TASK_INTERRUPTIBLE);
-		}
-
-		if ((current->flags & PF_KTHREAD) && kthread_should_stop()) {
-			__set_current_state(TASK_RUNNING);
+		if (is_kthread && kthread_should_stop())
 			return 1;
-		}
 
 		if (delay)
-			schedule_timeout(delay);
+			move_ctxt_wait_event_timeout(ctxt,
+					freezing(current) ||
+					(is_kthread && kthread_should_stop()),
+					delay);
 
 		if (unlikely(freezing(current))) {
-			move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
+			bch2_moving_ctxt_flush_all(ctxt);
 			try_to_freeze();
 		}
 	} while (delay);
@@ -683,6 +635,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
 {
 	struct btree_trans *trans = ctxt->trans;
 	struct bch_fs *c = trans->c;
+	bool is_kthread = current->flags & PF_KTHREAD;
 	struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
 	struct btree_iter iter;
 	struct bkey_buf sk;
@@ -728,6 +681,9 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
 	}
 
 	while (!(ret = bch2_move_ratelimit(ctxt))) {
+		if (is_kthread && kthread_should_stop())
+			break;
+
 		bch2_trans_begin(trans);
 
 		ret = bch2_get_next_backpointer(trans, bucket, gen,
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index 07cf9d42643b..0906aa2d1de2 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -38,6 +38,25 @@ struct moving_context {
 	wait_queue_head_t	wait;
 };
 
+#define move_ctxt_wait_event_timeout(_ctxt, _cond, _timeout)			\
+({										\
+	int _ret = 0;								\
+	while (true) {								\
+		bool cond_finished = false;					\
+		bch2_moving_ctxt_do_pending_writes(_ctxt);			\
+										\
+		if (_cond)							\
+			break;							\
+		bch2_trans_unlock_long((_ctxt)->trans);				\
+		_ret = __wait_event_timeout((_ctxt)->wait,			\
+			     bch2_moving_ctxt_next_pending_write(_ctxt) ||	\
+			     (cond_finished = (_cond)), _timeout);		\
+		if (_ret || ( cond_finished))					\
+			break;							\
+	}									\
+	_ret;									\
+})
+
 #define move_ctxt_wait_event(_ctxt, _cond)				\
 do {									\
 	bool cond_finished = false;					\
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 0a0576326c5b..a84e79f79e5e 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -207,7 +207,7 @@ static int bch2_copygc(struct moving_context *ctxt,
 		goto err;
 
 	darray_for_each(buckets, i) {
-		if (unlikely(freezing(current)))
+		if (kthread_should_stop() || freezing(current))
 			break;
 
 		f = move_bucket_in_flight_add(buckets_in_flight, *i);
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 9c30500ce920..770ced1c6285 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -167,6 +167,8 @@ static int bch2_journal_replay(struct bch_fs *c)
 			goto err;
 	}
 
+	BUG_ON(!atomic_read(&keys->ref));
+
 	for (i = 0; i < keys->nr; i++) {
 		k = keys_sorted[i];
 
@@ -188,6 +190,9 @@ static int bch2_journal_replay(struct bch_fs *c)
 		}
 	}
 
+	if (!c->opts.keep_journal)
+		bch2_journal_keys_put_initial(c);
+
 	replay_now_at(j, j->replay_journal_seq_end);
 	j->replay_journal_seq = 0;
 
@@ -909,10 +914,8 @@ out:
 	bch2_flush_fsck_errs(c);
 
 	if (!c->opts.keep_journal &&
-	    test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) {
-		bch2_journal_keys_free(&c->journal_keys);
-		bch2_journal_entries_free(c);
-	}
+	    test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
+		bch2_journal_keys_put_initial(c);
 	kfree(clean);
 
 	if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) {
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 1c3ae13bfced..2008fe8bf706 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -68,6 +68,33 @@ void bch2_replicas_entry_to_text(struct printbuf *out,
 	prt_printf(out, "]");
 }
 
+int bch2_replicas_entry_validate(struct bch_replicas_entry *r,
+				 struct bch_sb *sb,
+				 struct printbuf *err)
+{
+	if (!r->nr_devs) {
+		prt_printf(err, "no devices in entry ");
+		goto bad;
+	}
+
+	if (r->nr_required > 1 &&
+	    r->nr_required >= r->nr_devs) {
+		prt_printf(err, "bad nr_required in entry ");
+		goto bad;
+	}
+
+	for (unsigned i = 0; i < r->nr_devs; i++)
+		if (!bch2_dev_exists(sb, r->devs[i])) {
+			prt_printf(err, "invalid device %u in entry ", r->devs[i]);
+			goto bad;
+		}
+
+	return 0;
+bad:
+	bch2_replicas_entry_to_text(err, r);
+	return -BCH_ERR_invalid_replicas_entry;
+}
+
 void bch2_cpu_replicas_to_text(struct printbuf *out,
 			       struct bch_replicas_cpu *r)
 {
@@ -163,7 +190,8 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
 }
 
 static struct bch_replicas_cpu
-cpu_replicas_add_entry(struct bch_replicas_cpu *old,
+cpu_replicas_add_entry(struct bch_fs *c,
+		       struct bch_replicas_cpu *old,
 		       struct bch_replicas_entry *new_entry)
 {
 	unsigned i;
@@ -173,6 +201,9 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
 					replicas_entry_bytes(new_entry)),
 	};
 
+	for (i = 0; i < new_entry->nr_devs; i++)
+		BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i]));
+
 	BUG_ON(!new_entry->data_type);
 	verify_replicas_entry(new_entry);
 
@@ -382,7 +413,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
 
 	if (c->replicas_gc.entries &&
 	    !__replicas_has_entry(&c->replicas_gc, new_entry)) {
-		new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry);
+		new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
 		if (!new_gc.entries) {
 			ret = -BCH_ERR_ENOMEM_cpu_replicas;
 			goto err;
@@ -390,7 +421,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
 	}
 
 	if (!__replicas_has_entry(&c->replicas, new_entry)) {
-		new_r = cpu_replicas_add_entry(&c->replicas, new_entry);
+		new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
 		if (!new_r.entries) {
 			ret = -BCH_ERR_ENOMEM_cpu_replicas;
 			goto err;
@@ -598,7 +629,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
 	if (idx < 0) {
 		struct bch_replicas_cpu n;
 
-		n = cpu_replicas_add_entry(&c->replicas, r);
+		n = cpu_replicas_add_entry(c, &c->replicas, r);
 		if (!n.entries)
 			return -BCH_ERR_ENOMEM_cpu_replicas;
 
@@ -797,7 +828,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
 				      struct bch_sb *sb,
 				      struct printbuf *err)
 {
-	unsigned i, j;
+	unsigned i;
 
 	sort_cmp_size(cpu_r->entries,
 		      cpu_r->nr,
@@ -808,31 +839,9 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
 		struct bch_replicas_entry *e =
 			cpu_replicas_entry(cpu_r, i);
 
-		if (e->data_type >= BCH_DATA_NR) {
-			prt_printf(err, "invalid data type in entry ");
-			bch2_replicas_entry_to_text(err, e);
-			return -BCH_ERR_invalid_sb_replicas;
-		}
-
-		if (!e->nr_devs) {
-			prt_printf(err, "no devices in entry ");
-			bch2_replicas_entry_to_text(err, e);
-			return -BCH_ERR_invalid_sb_replicas;
-		}
-
-		if (e->nr_required > 1 &&
-		    e->nr_required >= e->nr_devs) {
-			prt_printf(err, "bad nr_required in entry ");
-			bch2_replicas_entry_to_text(err, e);
-			return -BCH_ERR_invalid_sb_replicas;
-		}
-
-		for (j = 0; j < e->nr_devs; j++)
-			if (!bch2_dev_exists(sb, e->devs[j])) {
-				prt_printf(err, "invalid device %u in entry ", e->devs[j]);
-				bch2_replicas_entry_to_text(err, e);
-				return -BCH_ERR_invalid_sb_replicas;
-			}
+		int ret = bch2_replicas_entry_validate(e, sb, err);
+		if (ret)
+			return ret;
 
 		if (i + 1 < cpu_r->nr) {
 			struct bch_replicas_entry *n =
diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h
index 4887675a86f0..f70a642775d1 100644
--- a/fs/bcachefs/replicas.h
+++ b/fs/bcachefs/replicas.h
@@ -9,6 +9,8 @@
 void bch2_replicas_entry_sort(struct bch_replicas_entry *);
 void bch2_replicas_entry_to_text(struct printbuf *,
 				 struct bch_replicas_entry *);
+int bch2_replicas_entry_validate(struct bch_replicas_entry *,
+				 struct bch_sb *, struct printbuf *);
 void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
 
 static inline struct bch_replicas_entry *
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index e9af77b384c7..5dac038f0851 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -959,7 +959,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
 					parent_id, id))
 			goto err;
 
-		parent->v.children[i] = le32_to_cpu(child_id);
+		parent->v.children[i] = cpu_to_le32(child_id);
 
 		normalize_snapshot_child_pointers(&parent->v);
 	}
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index f4cad903f4d6..f3e12f7979d5 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -166,6 +166,7 @@ void bch2_free_super(struct bch_sb_handle *sb)
 	if (!IS_ERR_OR_NULL(sb->bdev))
 		blkdev_put(sb->bdev, sb->holder);
 	kfree(sb->holder);
+	kfree(sb->sb_name);
 
 	kfree(sb->sb);
 	memset(sb, 0, sizeof(*sb));
@@ -675,6 +676,10 @@ retry:
 	if (!sb->holder)
 		return -ENOMEM;
 
+	sb->sb_name = kstrdup(path, GFP_KERNEL);
+	if (!sb->sb_name)
+		return -ENOMEM;
+
 #ifndef __KERNEL__
 	if (opt_get(*opts, direct_io) == false)
 		sb->mode |= BLK_OPEN_BUFFERED;
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 24672bb31cbe..f63474c5c5a2 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -423,6 +423,18 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
 		bch2_dev_allocator_add(c, ca);
 	bch2_recalc_capacity(c);
 
+	set_bit(BCH_FS_RW, &c->flags);
+	set_bit(BCH_FS_WAS_RW, &c->flags);
+
+#ifndef BCH_WRITE_REF_DEBUG
+	percpu_ref_reinit(&c->writes);
+#else
+	for (i = 0; i < BCH_WRITE_REF_NR; i++) {
+		BUG_ON(atomic_long_read(&c->writes[i]));
+		atomic_long_inc(&c->writes[i]);
+	}
+#endif
+
 	ret = bch2_gc_thread_start(c);
 	if (ret) {
 		bch_err(c, "error starting gc thread");
@@ -439,24 +451,16 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
 			goto err;
 	}
 
-#ifndef BCH_WRITE_REF_DEBUG
-	percpu_ref_reinit(&c->writes);
-#else
-	for (i = 0; i < BCH_WRITE_REF_NR; i++) {
-		BUG_ON(atomic_long_read(&c->writes[i]));
-		atomic_long_inc(&c->writes[i]);
-	}
-#endif
-	set_bit(BCH_FS_RW, &c->flags);
-	set_bit(BCH_FS_WAS_RW, &c->flags);
-
 	bch2_do_discards(c);
 	bch2_do_invalidates(c);
 	bch2_do_stripe_deletes(c);
 	bch2_do_pending_node_rewrites(c);
 	return 0;
 err:
-	__bch2_fs_read_only(c);
+	if (test_bit(BCH_FS_RW, &c->flags))
+		bch2_fs_read_only(c);
+	else
+		__bch2_fs_read_only(c);
 	return ret;
 }
 
@@ -504,8 +508,8 @@ static void __bch2_fs_free(struct bch_fs *c)
 	bch2_io_clock_exit(&c->io_clock[WRITE]);
 	bch2_io_clock_exit(&c->io_clock[READ]);
 	bch2_fs_compress_exit(c);
-	bch2_journal_keys_free(&c->journal_keys);
-	bch2_journal_entries_free(c);
+	bch2_journal_keys_put_initial(c);
+	BUG_ON(atomic_read(&c->journal_keys.ref));
 	bch2_fs_btree_write_buffer_exit(c);
 	percpu_free_rwsem(&c->mark_lock);
 	free_percpu(c->online_reserved);
@@ -702,6 +706,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 
 	init_rwsem(&c->gc_lock);
 	mutex_init(&c->gc_gens_lock);
+	atomic_set(&c->journal_keys.ref, 1);
+	c->journal_keys.initial_ref_held = true;
 
 	for (i = 0; i < BCH_TIME_STAT_NR; i++)
 		bch2_time_stats_init(&c->times[i]);
diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h
index 7dda4985b99f..9c1fd4ca2b10 100644
--- a/fs/bcachefs/super_types.h
+++ b/fs/bcachefs/super_types.h
@@ -5,6 +5,7 @@
 struct bch_sb_handle {
 	struct bch_sb		*sb;
 	struct block_device	*bdev;
+	char			*sb_name;
 	struct bio		*bio;
 	void			*holder;
 	size_t			buffer_size;
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 7857671159b4..fd49b63562c3 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -754,9 +754,9 @@ TRACE_EVENT(move_extent_fail,
 	TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg))
 );
 
-DEFINE_EVENT(bkey, move_extent_alloc_mem_fail,
-	TP_PROTO(struct bch_fs *c, const char *k),
-	TP_ARGS(c, k)
+DEFINE_EVENT(bkey, move_extent_start_fail,
+	TP_PROTO(struct bch_fs *c, const char *str),
+	TP_ARGS(c, str)
 );
 
 TRACE_EVENT(move_data,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 401ea09ae4b8..bbcc3df77646 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3213,6 +3213,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 		goto fail_alloc;
 	}
 
+	btrfs_info(fs_info, "first mount of filesystem %pU", disk_super->fsid);
 	/*
 	 * Verify the type first, if that or the checksum value are
 	 * corrupted, we'll find out
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 03cef28d9e37..e6230a6ffa98 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -674,8 +674,8 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
  * 		the array will be skipped
  *
  * Return: 0        if all pages were able to be allocated;
- *         -ENOMEM  otherwise, and the caller is responsible for freeing all
- *                  non-null page pointers in the array.
+ *         -ENOMEM  otherwise, the partially allocated pages would be freed and
+ *                  the array slots zeroed
  */
 int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array)
 {
@@ -694,8 +694,13 @@ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array)
 		 * though alloc_pages_bulk_array() falls back to alloc_page()
 		 * if  it could not bulk-allocate. So we must be out of memory.
 		 */
-		if (allocated == last)
+		if (allocated == last) {
+			for (int i = 0; i < allocated; i++) {
+				__free_page(page_array[i]);
+				page_array[i] = NULL;
+			}
 			return -ENOMEM;
+		}
 
 		memalloc_retry_wait(GFP_NOFS);
 	}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index dfe257e1845b..4e50b62db2a8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4356,6 +4356,7 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat
 		arg->clone_sources = compat_ptr(args32.clone_sources);
 		arg->parent_root = args32.parent_root;
 		arg->flags = args32.flags;
+		arg->version = args32.version;
 		memcpy(arg->reserved, args32.reserved,
 		       sizeof(args32.reserved));
 #else
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 1f62976bee82..6486f0d7e993 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -794,6 +794,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
 			dump_ref_action(fs_info, ra);
 			kfree(ref);
 			kfree(ra);
+			kfree(re);
 			goto out_unlock;
 		} else if (be->num_refs == 0) {
 			btrfs_err(fs_info,
@@ -803,6 +804,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
 			dump_ref_action(fs_info, ra);
 			kfree(ref);
 			kfree(ra);
+			kfree(re);
 			goto out_unlock;
 		}
 
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 3b929f0e8f04..4e36550618e5 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -8158,7 +8158,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
 	}
 
 	sctx->send_filp = fget(arg->send_fd);
-	if (!sctx->send_filp) {
+	if (!sctx->send_filp || !(sctx->send_filp->f_mode & FMODE_WRITE)) {
 		ret = -EBADF;
 		goto out;
 	}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f638dc339693..ef256b944c72 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -80,7 +80,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data);
 
 static void btrfs_put_super(struct super_block *sb)
 {
-	close_ctree(btrfs_sb(sb));
+	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+
+	btrfs_info(fs_info, "last unmount of filesystem %pU", fs_info->fs_devices->fsid);
+	close_ctree(fs_info);
 }
 
 enum {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 6e63816dddcb..bfc0eb5e3b7c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1774,7 +1774,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	btrfs_release_path(path);
 
 	ret = btrfs_create_qgroup(trans, objectid);
-	if (ret) {
+	if (ret && ret != -EEXIST) {
 		btrfs_abort_transaction(trans, ret);
 		goto fail;
 	}
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index a416cbea75d1..50fdc69fdddf 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -31,6 +31,7 @@
 #include "inode-item.h"
 #include "dir-item.h"
 #include "raid-stripe-tree.h"
+#include "extent-tree.h"
 
 /*
  * Error message should follow the following format:
@@ -1276,6 +1277,8 @@ static int check_extent_item(struct extent_buffer *leaf,
 	unsigned long ptr;	/* Current pointer inside inline refs */
 	unsigned long end;	/* Extent item end */
 	const u32 item_size = btrfs_item_size(leaf, slot);
+	u8 last_type = 0;
+	u64 last_seq = U64_MAX;
 	u64 flags;
 	u64 generation;
 	u64 total_refs;		/* Total refs in btrfs_extent_item */
@@ -1322,6 +1325,18 @@ static int check_extent_item(struct extent_buffer *leaf,
 	 *    2.2) Ref type specific data
 	 *         Either using btrfs_extent_inline_ref::offset, or specific
 	 *         data structure.
+	 *
+	 *    All above inline items should follow the order:
+	 *
+	 *    - All btrfs_extent_inline_ref::type should be in an ascending
+	 *      order
+	 *
+	 *    - Within the same type, the items should follow a descending
+	 *      order by their sequence number. The sequence number is
+	 *      determined by:
+	 *      * btrfs_extent_inline_ref::offset for all types  other than
+	 *        EXTENT_DATA_REF
+	 *      * hash_extent_data_ref() for EXTENT_DATA_REF
 	 */
 	if (unlikely(item_size < sizeof(*ei))) {
 		extent_err(leaf, slot,
@@ -1403,6 +1418,7 @@ static int check_extent_item(struct extent_buffer *leaf,
 		struct btrfs_extent_inline_ref *iref;
 		struct btrfs_extent_data_ref *dref;
 		struct btrfs_shared_data_ref *sref;
+		u64 seq;
 		u64 dref_offset;
 		u64 inline_offset;
 		u8 inline_type;
@@ -1416,6 +1432,7 @@ static int check_extent_item(struct extent_buffer *leaf,
 		iref = (struct btrfs_extent_inline_ref *)ptr;
 		inline_type = btrfs_extent_inline_ref_type(leaf, iref);
 		inline_offset = btrfs_extent_inline_ref_offset(leaf, iref);
+		seq = inline_offset;
 		if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) {
 			extent_err(leaf, slot,
 "inline ref item overflows extent item, ptr %lu iref size %u end %lu",
@@ -1446,6 +1463,10 @@ static int check_extent_item(struct extent_buffer *leaf,
 		case BTRFS_EXTENT_DATA_REF_KEY:
 			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
 			dref_offset = btrfs_extent_data_ref_offset(leaf, dref);
+			seq = hash_extent_data_ref(
+					btrfs_extent_data_ref_root(leaf, dref),
+					btrfs_extent_data_ref_objectid(leaf, dref),
+					btrfs_extent_data_ref_offset(leaf, dref));
 			if (unlikely(!IS_ALIGNED(dref_offset,
 						 fs_info->sectorsize))) {
 				extent_err(leaf, slot,
@@ -1475,6 +1496,24 @@ static int check_extent_item(struct extent_buffer *leaf,
 				   inline_type);
 			return -EUCLEAN;
 		}
+		if (inline_type < last_type) {
+			extent_err(leaf, slot,
+				   "inline ref out-of-order: has type %u, prev type %u",
+				   inline_type, last_type);
+			return -EUCLEAN;
+		}
+		/* Type changed, allow the sequence starts from U64_MAX again. */
+		if (inline_type > last_type)
+			last_seq = U64_MAX;
+		if (seq > last_seq) {
+			extent_err(leaf, slot,
+"inline ref out-of-order: has type %u offset %llu seq 0x%llx, prev type %u seq 0x%llx",
+				   inline_type, inline_offset, seq,
+				   last_type, last_seq);
+			return -EUCLEAN;
+		}
+		last_type = inline_type;
+		last_seq = seq;
 		ptr += btrfs_extent_inline_ref_size(inline_type);
 	}
 	/* No padding is allowed */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c6f16625af51..f627674b37db 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3006,15 +3006,16 @@ struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
 	read_unlock(&em_tree->lock);
 
 	if (!em) {
-		btrfs_crit(fs_info, "unable to find logical %llu length %llu",
+		btrfs_crit(fs_info,
+			   "unable to find chunk map for logical %llu length %llu",
 			   logical, length);
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (em->start > logical || em->start + em->len < logical) {
+	if (em->start > logical || em->start + em->len <= logical) {
 		btrfs_crit(fs_info,
-			   "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
-			   logical, length, em->start, em->start + em->len);
+			   "found a bad chunk map, wanted %llu-%llu, found %llu-%llu",
+			   logical, logical + length, em->start, em->start + em->len);
 		free_extent_map(em);
 		return ERR_PTR(-EINVAL);
 	}
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index c45e8c2d62e1..5063434be0fc 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -84,6 +84,14 @@ int debugfs_file_get(struct dentry *dentry)
 	struct debugfs_fsdata *fsd;
 	void *d_fsd;
 
+	/*
+	 * This could only happen if some debugfs user erroneously calls
+	 * debugfs_file_get() on a dentry that isn't even a file, let
+	 * them know about it.
+	 */
+	if (WARN_ON(!d_is_reg(dentry)))
+		return -EINVAL;
+
 	d_fsd = READ_ONCE(dentry->d_fsdata);
 	if (!((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) {
 		fsd = d_fsd;
@@ -100,6 +108,8 @@ int debugfs_file_get(struct dentry *dentry)
 			kfree(fsd);
 			fsd = READ_ONCE(dentry->d_fsdata);
 		}
+		INIT_LIST_HEAD(&fsd->cancellations);
+		mutex_init(&fsd->cancellations_mtx);
 	}
 
 	/*
@@ -138,6 +148,86 @@ void debugfs_file_put(struct dentry *dentry)
 }
 EXPORT_SYMBOL_GPL(debugfs_file_put);
 
+/**
+ * debugfs_enter_cancellation - enter a debugfs cancellation
+ * @file: the file being accessed
+ * @cancellation: the cancellation object, the cancel callback
+ *	inside of it must be initialized
+ *
+ * When a debugfs file is removed it needs to wait for all active
+ * operations to complete. However, the operation itself may need
+ * to wait for hardware or completion of some asynchronous process
+ * or similar. As such, it may need to be cancelled to avoid long
+ * waits or even deadlocks.
+ *
+ * This function can be used inside a debugfs handler that may
+ * need to be cancelled. As soon as this function is called, the
+ * cancellation's 'cancel' callback may be called, at which point
+ * the caller should proceed to call debugfs_leave_cancellation()
+ * and leave the debugfs handler function as soon as possible.
+ * Note that the 'cancel' callback is only ever called in the
+ * context of some kind of debugfs_remove().
+ *
+ * This function must be paired with debugfs_leave_cancellation().
+ */
+void debugfs_enter_cancellation(struct file *file,
+				struct debugfs_cancellation *cancellation)
+{
+	struct debugfs_fsdata *fsd;
+	struct dentry *dentry = F_DENTRY(file);
+
+	INIT_LIST_HEAD(&cancellation->list);
+
+	if (WARN_ON(!d_is_reg(dentry)))
+		return;
+
+	if (WARN_ON(!cancellation->cancel))
+		return;
+
+	fsd = READ_ONCE(dentry->d_fsdata);
+	if (WARN_ON(!fsd ||
+		    ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)))
+		return;
+
+	mutex_lock(&fsd->cancellations_mtx);
+	list_add(&cancellation->list, &fsd->cancellations);
+	mutex_unlock(&fsd->cancellations_mtx);
+
+	/* if we're already removing wake it up to cancel */
+	if (d_unlinked(dentry))
+		complete(&fsd->active_users_drained);
+}
+EXPORT_SYMBOL_GPL(debugfs_enter_cancellation);
+
+/**
+ * debugfs_leave_cancellation - leave cancellation section
+ * @file: the file being accessed
+ * @cancellation: the cancellation previously registered with
+ *	debugfs_enter_cancellation()
+ *
+ * See the documentation of debugfs_enter_cancellation().
+ */
+void debugfs_leave_cancellation(struct file *file,
+				struct debugfs_cancellation *cancellation)
+{
+	struct debugfs_fsdata *fsd;
+	struct dentry *dentry = F_DENTRY(file);
+
+	if (WARN_ON(!d_is_reg(dentry)))
+		return;
+
+	fsd = READ_ONCE(dentry->d_fsdata);
+	if (WARN_ON(!fsd ||
+		    ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)))
+		return;
+
+	mutex_lock(&fsd->cancellations_mtx);
+	if (!list_empty(&cancellation->list))
+		list_del(&cancellation->list);
+	mutex_unlock(&fsd->cancellations_mtx);
+}
+EXPORT_SYMBOL_GPL(debugfs_leave_cancellation);
+
 /*
  * Only permit access to world-readable files when the kernel is locked down.
  * We also need to exclude any file that has ways to write or alter it as root
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 5d41765e0c77..034a617cb1a5 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -236,17 +236,25 @@ static const struct super_operations debugfs_super_operations = {
 
 static void debugfs_release_dentry(struct dentry *dentry)
 {
-	void *fsd = dentry->d_fsdata;
+	struct debugfs_fsdata *fsd = dentry->d_fsdata;
 
-	if (!((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))
-		kfree(dentry->d_fsdata);
+	if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
+		return;
+
+	/* check it wasn't a dir (no fsdata) or automount (no real_fops) */
+	if (fsd && fsd->real_fops) {
+		WARN_ON(!list_empty(&fsd->cancellations));
+		mutex_destroy(&fsd->cancellations_mtx);
+	}
+
+	kfree(fsd);
 }
 
 static struct vfsmount *debugfs_automount(struct path *path)
 {
-	debugfs_automount_t f;
-	f = (debugfs_automount_t)path->dentry->d_fsdata;
-	return f(path->dentry, d_inode(path->dentry)->i_private);
+	struct debugfs_fsdata *fsd = path->dentry->d_fsdata;
+
+	return fsd->automount(path->dentry, d_inode(path->dentry)->i_private);
 }
 
 static const struct dentry_operations debugfs_dops = {
@@ -634,13 +642,23 @@ struct dentry *debugfs_create_automount(const char *name,
 					void *data)
 {
 	struct dentry *dentry = start_creating(name, parent);
+	struct debugfs_fsdata *fsd;
 	struct inode *inode;
 
 	if (IS_ERR(dentry))
 		return dentry;
 
+	fsd = kzalloc(sizeof(*fsd), GFP_KERNEL);
+	if (!fsd) {
+		failed_creating(dentry);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fsd->automount = f;
+
 	if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
 		failed_creating(dentry);
+		kfree(fsd);
 		return ERR_PTR(-EPERM);
 	}
 
@@ -648,13 +666,14 @@ struct dentry *debugfs_create_automount(const char *name,
 	if (unlikely(!inode)) {
 		pr_err("out of free dentries, can not create automount '%s'\n",
 		       name);
+		kfree(fsd);
 		return failed_creating(dentry);
 	}
 
 	make_empty_dir_inode(inode);
 	inode->i_flags |= S_AUTOMOUNT;
 	inode->i_private = data;
-	dentry->d_fsdata = (void *)f;
+	dentry->d_fsdata = fsd;
 	/* directory inodes start off with i_nlink == 2 (for "." entry) */
 	inc_nlink(inode);
 	d_instantiate(dentry, inode);
@@ -731,8 +750,37 @@ static void __debugfs_file_removed(struct dentry *dentry)
 	fsd = READ_ONCE(dentry->d_fsdata);
 	if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
 		return;
-	if (!refcount_dec_and_test(&fsd->active_users))
+
+	/* if we hit zero, just wait for all to finish */
+	if (!refcount_dec_and_test(&fsd->active_users)) {
 		wait_for_completion(&fsd->active_users_drained);
+		return;
+	}
+
+	/* if we didn't hit zero, try to cancel any we can */
+	while (refcount_read(&fsd->active_users)) {
+		struct debugfs_cancellation *c;
+
+		/*
+		 * Lock the cancellations. Note that the cancellations
+		 * structs are meant to be on the stack, so we need to
+		 * ensure we either use them here or don't touch them,
+		 * and debugfs_leave_cancellation() will wait for this
+		 * to be finished processing before exiting one. It may
+		 * of course win and remove the cancellation, but then
+		 * chances are we never even got into this bit, we only
+		 * do if the refcount isn't zero already.
+		 */
+		mutex_lock(&fsd->cancellations_mtx);
+		while ((c = list_first_entry_or_null(&fsd->cancellations,
+						     typeof(*c), list))) {
+			list_del_init(&c->list);
+			c->cancel(dentry, c->cancel_data);
+		}
+		mutex_unlock(&fsd->cancellations_mtx);
+
+		wait_for_completion(&fsd->active_users_drained);
+	}
 }
 
 static void remove_one(struct dentry *victim)
diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
index 92af8ae31313..dae80c2a469e 100644
--- a/fs/debugfs/internal.h
+++ b/fs/debugfs/internal.h
@@ -7,6 +7,7 @@
 
 #ifndef _DEBUGFS_INTERNAL_H_
 #define _DEBUGFS_INTERNAL_H_
+#include <linux/list.h>
 
 struct file_operations;
 
@@ -17,8 +18,18 @@ extern const struct file_operations debugfs_full_proxy_file_operations;
 
 struct debugfs_fsdata {
 	const struct file_operations *real_fops;
-	refcount_t active_users;
-	struct completion active_users_drained;
+	union {
+		/* automount_fn is used when real_fops is NULL */
+		debugfs_automount_t automount;
+		struct {
+			refcount_t active_users;
+			struct completion active_users_drained;
+
+			/* protect cancellations */
+			struct mutex cancellations_mtx;
+			struct list_head cancellations;
+		};
+	};
 };
 
 /*
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 1039e5bf90af..4ddc36f4dbd4 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -258,7 +258,6 @@ static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
 			goto out_unlock;
 		}
 
-		iocb->ki_pos += status;
 		ret += status;
 		endbyte = pos + status - 1;
 		ret2 = filemap_write_and_wait_range(inode->i_mapping, pos,
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 2c6078a6b8ec..58ca7c936393 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -501,15 +501,38 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
 
 	down_write(&NILFS_MDT(sufile)->mi_sem);
 	ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
-	if (!ret) {
-		mark_buffer_dirty(bh);
-		nilfs_mdt_mark_dirty(sufile);
-		kaddr = kmap_atomic(bh->b_page);
-		su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+	if (ret)
+		goto out_sem;
+
+	kaddr = kmap_atomic(bh->b_page);
+	su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+	if (unlikely(nilfs_segment_usage_error(su))) {
+		struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+
+		kunmap_atomic(kaddr);
+		brelse(bh);
+		if (nilfs_segment_is_active(nilfs, segnum)) {
+			nilfs_error(sufile->i_sb,
+				    "active segment %llu is erroneous",
+				    (unsigned long long)segnum);
+		} else {
+			/*
+			 * Segments marked erroneous are never allocated by
+			 * nilfs_sufile_alloc(); only active segments, ie,
+			 * the segments indexed by ns_segnum or ns_nextnum,
+			 * can be erroneous here.
+			 */
+			WARN_ON_ONCE(1);
+		}
+		ret = -EIO;
+	} else {
 		nilfs_segment_usage_set_dirty(su);
 		kunmap_atomic(kaddr);
+		mark_buffer_dirty(bh);
+		nilfs_mdt_mark_dirty(sufile);
 		brelse(bh);
 	}
+out_sem:
 	up_write(&NILFS_MDT(sufile)->mi_sem);
 	return ret;
 }
@@ -536,9 +559,14 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
 
 	kaddr = kmap_atomic(bh->b_page);
 	su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
-	WARN_ON(nilfs_segment_usage_error(su));
-	if (modtime)
+	if (modtime) {
+		/*
+		 * Check segusage error and set su_lastmod only when updating
+		 * this entry with a valid timestamp, not for cancellation.
+		 */
+		WARN_ON_ONCE(nilfs_segment_usage_error(su));
 		su->su_lastmod = cpu_to_le64(modtime);
+	}
 	su->su_nblocks = cpu_to_le32(nblocks);
 	kunmap_atomic(kaddr);
 
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 0f0667957c81..71400496ed36 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -716,7 +716,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
 			goto failed_sbh;
 		}
 		nilfs_release_super_block(nilfs);
-		sb_set_blocksize(sb, blocksize);
+		if (!sb_set_blocksize(sb, blocksize)) {
+			nilfs_err(sb, "bad blocksize %d", blocksize);
+			err = -EINVAL;
+			goto out;
+		}
 
 		err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp);
 		if (err)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ef2eb12906da..435b61054b5b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1982,15 +1982,31 @@ static int pagemap_scan_test_walk(unsigned long start, unsigned long end,
 	struct pagemap_scan_private *p = walk->private;
 	struct vm_area_struct *vma = walk->vma;
 	unsigned long vma_category = 0;
+	bool wp_allowed = userfaultfd_wp_async(vma) &&
+	    userfaultfd_wp_use_markers(vma);
 
-	if (userfaultfd_wp_async(vma) && userfaultfd_wp_use_markers(vma))
-		vma_category |= PAGE_IS_WPALLOWED;
-	else if (p->arg.flags & PM_SCAN_CHECK_WPASYNC)
-		return -EPERM;
+	if (!wp_allowed) {
+		/* User requested explicit failure over wp-async capability */
+		if (p->arg.flags & PM_SCAN_CHECK_WPASYNC)
+			return -EPERM;
+		/*
+		 * User requires wr-protect, and allows silently skipping
+		 * unsupported vmas.
+		 */
+		if (p->arg.flags & PM_SCAN_WP_MATCHING)
+			return 1;
+		/*
+		 * Then the request doesn't involve wr-protects at all,
+		 * fall through to the rest checks, and allow vma walk.
+		 */
+	}
 
 	if (vma->vm_flags & VM_PFNMAP)
 		return 1;
 
+	if (wp_allowed)
+		vma_category |= PAGE_IS_WPALLOWED;
+
 	if (!pagemap_scan_is_interesting_vma(vma_category, p))
 		return 1;
 
@@ -2140,7 +2156,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
 		return 0;
 	}
 
-	if (!p->vec_out) {
+	if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) {
 		/* Fast path for performing exclusive WP */
 		for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
 			if (pte_uffd_wp(ptep_get(pte)))
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index ea3a7a668b45..2131638f26d0 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -1196,32 +1196,103 @@ const struct inode_operations cifs_symlink_inode_ops = {
 	.listxattr = cifs_listxattr,
 };
 
+/*
+ * Advance the EOF marker to after the source range.
+ */
+static int cifs_precopy_set_eof(struct inode *src_inode, struct cifsInodeInfo *src_cifsi,
+				struct cifs_tcon *src_tcon,
+				unsigned int xid, loff_t src_end)
+{
+	struct cifsFileInfo *writeable_srcfile;
+	int rc = -EINVAL;
+
+	writeable_srcfile = find_writable_file(src_cifsi, FIND_WR_FSUID_ONLY);
+	if (writeable_srcfile) {
+		if (src_tcon->ses->server->ops->set_file_size)
+			rc = src_tcon->ses->server->ops->set_file_size(
+				xid, src_tcon, writeable_srcfile,
+				src_inode->i_size, true /* no need to set sparse */);
+		else
+			rc = -ENOSYS;
+		cifsFileInfo_put(writeable_srcfile);
+		cifs_dbg(FYI, "SetFSize for copychunk rc = %d\n", rc);
+	}
+
+	if (rc < 0)
+		goto set_failed;
+
+	netfs_resize_file(&src_cifsi->netfs, src_end);
+	fscache_resize_cookie(cifs_inode_cookie(src_inode), src_end);
+	return 0;
+
+set_failed:
+	return filemap_write_and_wait(src_inode->i_mapping);
+}
+
+/*
+ * Flush out either the folio that overlaps the beginning of a range in which
+ * pos resides or the folio that overlaps the end of a range unless that folio
+ * is entirely within the range we're going to invalidate.  We extend the flush
+ * bounds to encompass the folio.
+ */
+static int cifs_flush_folio(struct inode *inode, loff_t pos, loff_t *_fstart, loff_t *_fend,
+			    bool first)
+{
+	struct folio *folio;
+	unsigned long long fpos, fend;
+	pgoff_t index = pos / PAGE_SIZE;
+	size_t size;
+	int rc = 0;
+
+	folio = filemap_get_folio(inode->i_mapping, index);
+	if (IS_ERR(folio))
+		return 0;
+
+	size = folio_size(folio);
+	fpos = folio_pos(folio);
+	fend = fpos + size - 1;
+	*_fstart = min_t(unsigned long long, *_fstart, fpos);
+	*_fend   = max_t(unsigned long long, *_fend, fend);
+	if ((first && pos == fpos) || (!first && pos == fend))
+		goto out;
+
+	rc = filemap_write_and_wait_range(inode->i_mapping, fpos, fend);
+out:
+	folio_put(folio);
+	return rc;
+}
+
 static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
 		struct file *dst_file, loff_t destoff, loff_t len,
 		unsigned int remap_flags)
 {
 	struct inode *src_inode = file_inode(src_file);
 	struct inode *target_inode = file_inode(dst_file);
+	struct cifsInodeInfo *src_cifsi = CIFS_I(src_inode);
+	struct cifsInodeInfo *target_cifsi = CIFS_I(target_inode);
 	struct cifsFileInfo *smb_file_src = src_file->private_data;
-	struct cifsFileInfo *smb_file_target;
-	struct cifs_tcon *target_tcon;
+	struct cifsFileInfo *smb_file_target = dst_file->private_data;
+	struct cifs_tcon *target_tcon, *src_tcon;
+	unsigned long long destend, fstart, fend, new_size;
 	unsigned int xid;
 	int rc;
 
-	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+	if (remap_flags & REMAP_FILE_DEDUP)
+		return -EOPNOTSUPP;
+	if (remap_flags & ~REMAP_FILE_ADVISORY)
 		return -EINVAL;
 
 	cifs_dbg(FYI, "clone range\n");
 
 	xid = get_xid();
 
-	if (!src_file->private_data || !dst_file->private_data) {
+	if (!smb_file_src || !smb_file_target) {
 		rc = -EBADF;
 		cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
 		goto out;
 	}
 
-	smb_file_target = dst_file->private_data;
+	src_tcon = tlink_tcon(smb_file_src->tlink);
 	target_tcon = tlink_tcon(smb_file_target->tlink);
 
 	/*
@@ -1234,20 +1305,63 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
 	if (len == 0)
 		len = src_inode->i_size - off;
 
-	cifs_dbg(FYI, "about to flush pages\n");
-	/* should we flush first and last page first */
-	truncate_inode_pages_range(&target_inode->i_data, destoff,
-				   PAGE_ALIGN(destoff + len)-1);
+	cifs_dbg(FYI, "clone range\n");
 
-	if (target_tcon->ses->server->ops->duplicate_extents)
+	/* Flush the source buffer */
+	rc = filemap_write_and_wait_range(src_inode->i_mapping, off,
+					  off + len - 1);
+	if (rc)
+		goto unlock;
+
+	/* The server-side copy will fail if the source crosses the EOF marker.
+	 * Advance the EOF marker after the flush above to the end of the range
+	 * if it's short of that.
+	 */
+	if (src_cifsi->netfs.remote_i_size < off + len) {
+		rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len);
+		if (rc < 0)
+			goto unlock;
+	}
+
+	new_size = destoff + len;
+	destend = destoff + len - 1;
+
+	/* Flush the folios at either end of the destination range to prevent
+	 * accidental loss of dirty data outside of the range.
+	 */
+	fstart = destoff;
+	fend = destend;
+
+	rc = cifs_flush_folio(target_inode, destoff, &fstart, &fend, true);
+	if (rc)
+		goto unlock;
+	rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false);
+	if (rc)
+		goto unlock;
+
+	/* Discard all the folios that overlap the destination region. */
+	cifs_dbg(FYI, "about to discard pages %llx-%llx\n", fstart, fend);
+	truncate_inode_pages_range(&target_inode->i_data, fstart, fend);
+
+	fscache_invalidate(cifs_inode_cookie(target_inode), NULL,
+			   i_size_read(target_inode), 0);
+
+	rc = -EOPNOTSUPP;
+	if (target_tcon->ses->server->ops->duplicate_extents) {
 		rc = target_tcon->ses->server->ops->duplicate_extents(xid,
 			smb_file_src, smb_file_target, off, len, destoff);
-	else
-		rc = -EOPNOTSUPP;
+		if (rc == 0 && new_size > i_size_read(target_inode)) {
+			truncate_setsize(target_inode, new_size);
+			netfs_resize_file(&target_cifsi->netfs, new_size);
+			fscache_resize_cookie(cifs_inode_cookie(target_inode),
+					      new_size);
+		}
+	}
 
 	/* force revalidate of size and timestamps of target file now
 	   that target is updated on the server */
 	CIFS_I(target_inode)->time = 0;
+unlock:
 	/* although unlocking in the reverse order from locking is not
 	   strictly necessary here it is a little cleaner to be consistent */
 	unlock_two_nondirectories(src_inode, target_inode);
@@ -1263,10 +1377,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
 {
 	struct inode *src_inode = file_inode(src_file);
 	struct inode *target_inode = file_inode(dst_file);
+	struct cifsInodeInfo *src_cifsi = CIFS_I(src_inode);
 	struct cifsFileInfo *smb_file_src;
 	struct cifsFileInfo *smb_file_target;
 	struct cifs_tcon *src_tcon;
 	struct cifs_tcon *target_tcon;
+	unsigned long long destend, fstart, fend;
 	ssize_t rc;
 
 	cifs_dbg(FYI, "copychunk range\n");
@@ -1306,13 +1422,41 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
 	if (rc)
 		goto unlock;
 
-	/* should we flush first and last page first */
-	truncate_inode_pages(&target_inode->i_data, 0);
+	/* The server-side copy will fail if the source crosses the EOF marker.
+	 * Advance the EOF marker after the flush above to the end of the range
+	 * if it's short of that.
+	 */
+	if (src_cifsi->server_eof < off + len) {
+		rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len);
+		if (rc < 0)
+			goto unlock;
+	}
+
+	destend = destoff + len - 1;
+
+	/* Flush the folios at either end of the destination range to prevent
+	 * accidental loss of dirty data outside of the range.
+	 */
+	fstart = destoff;
+	fend = destend;
+
+	rc = cifs_flush_folio(target_inode, destoff, &fstart, &fend, true);
+	if (rc)
+		goto unlock;
+	rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false);
+	if (rc)
+		goto unlock;
+
+	/* Discard all the folios that overlap the destination region. */
+	truncate_inode_pages_range(&target_inode->i_data, fstart, fend);
 
 	rc = file_modified(dst_file);
-	if (!rc)
+	if (!rc) {
 		rc = target_tcon->ses->server->ops->copychunk_range(xid,
 			smb_file_src, smb_file_target, off, len, destoff);
+		if (rc > 0 && destoff + rc > i_size_read(target_inode))
+			truncate_setsize(target_inode, destoff + rc);
+	}
 
 	file_accessed(src_file);
 
diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index 83ccc51a54d0..c0513fbb8a59 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -882,11 +882,13 @@ typedef struct smb_com_open_rsp {
 	__u8 OplockLevel;
 	__u16 Fid;
 	__le32 CreateAction;
-	__le64 CreationTime;
-	__le64 LastAccessTime;
-	__le64 LastWriteTime;
-	__le64 ChangeTime;
-	__le32 FileAttributes;
+	struct_group(common_attributes,
+		__le64 CreationTime;
+		__le64 LastAccessTime;
+		__le64 LastWriteTime;
+		__le64 ChangeTime;
+		__le32 FileAttributes;
+	);
 	__le64 AllocationSize;
 	__le64 EndOfFile;
 	__le16 FileType;
@@ -2264,11 +2266,13 @@ typedef struct {
 /* QueryFileInfo/QueryPathinfo (also for SetPath/SetFile) data buffer formats */
 /******************************************************************************/
 typedef struct { /* data block encoding of response to level 263 QPathInfo */
-	__le64 CreationTime;
-	__le64 LastAccessTime;
-	__le64 LastWriteTime;
-	__le64 ChangeTime;
-	__le32 Attributes;
+	struct_group(common_attributes,
+		__le64 CreationTime;
+		__le64 LastAccessTime;
+		__le64 LastWriteTime;
+		__le64 ChangeTime;
+		__le32 Attributes;
+	);
 	__u32 Pad1;
 	__le64 AllocationSize;
 	__le64 EndOfFile;	/* size ie offset to first free byte in file */
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index bad91ba6c3a9..9ee348e6d106 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1244,8 +1244,10 @@ openRetry:
 		*oplock |= CIFS_CREATE_ACTION;
 
 	if (buf) {
-		/* copy from CreationTime to Attributes */
-		memcpy((char *)buf, (char *)&rsp->CreationTime, 36);
+		/* copy commonly used attributes */
+		memcpy(&buf->common_attributes,
+		       &rsp->common_attributes,
+		       sizeof(buf->common_attributes));
 		/* the file_info buf is endian converted by caller */
 		buf->AllocationSize = rsp->AllocationSize;
 		buf->EndOfFile = rsp->EndOfFile;
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index f896f60c924b..9dc6dc2754c2 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -402,13 +402,7 @@ static int __cifs_reconnect(struct TCP_Server_Info *server,
 			spin_unlock(&server->srv_lock);
 			cifs_swn_reset_server_dstaddr(server);
 			cifs_server_unlock(server);
-
-			/* increase ref count which reconnect work will drop */
-			spin_lock(&cifs_tcp_ses_lock);
-			server->srv_count++;
-			spin_unlock(&cifs_tcp_ses_lock);
-			if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0))
-				cifs_put_tcp_session(server, false);
+			mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
 		}
 	} while (server->tcpStatus == CifsNeedReconnect);
 
@@ -538,13 +532,7 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server)
 		spin_unlock(&server->srv_lock);
 		cifs_swn_reset_server_dstaddr(server);
 		cifs_server_unlock(server);
-
-		/* increase ref count which reconnect work will drop */
-		spin_lock(&cifs_tcp_ses_lock);
-		server->srv_count++;
-		spin_unlock(&cifs_tcp_ses_lock);
-		if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0))
-			cifs_put_tcp_session(server, false);
+		mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
 	} while (server->tcpStatus == CifsNeedReconnect);
 
 	mutex_lock(&server->refpath_lock);
@@ -1620,25 +1608,22 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
 	list_del_init(&server->tcp_ses_list);
 	spin_unlock(&cifs_tcp_ses_lock);
 
-	/* For secondary channels, we pick up ref-count on the primary server */
-	if (SERVER_IS_CHAN(server))
-		cifs_put_tcp_session(server->primary_server, from_reconnect);
-
 	cancel_delayed_work_sync(&server->echo);
 
-	if (from_reconnect) {
+	if (from_reconnect)
 		/*
 		 * Avoid deadlock here: reconnect work calls
 		 * cifs_put_tcp_session() at its end. Need to be sure
 		 * that reconnect work does nothing with server pointer after
 		 * that step.
 		 */
-		if (cancel_delayed_work(&server->reconnect))
-			cifs_put_tcp_session(server, from_reconnect);
-	} else {
-		if (cancel_delayed_work_sync(&server->reconnect))
-			cifs_put_tcp_session(server, from_reconnect);
-	}
+		cancel_delayed_work(&server->reconnect);
+	else
+		cancel_delayed_work_sync(&server->reconnect);
+
+	/* For secondary channels, we pick up ref-count on the primary server */
+	if (SERVER_IS_CHAN(server))
+		cifs_put_tcp_session(server->primary_server, from_reconnect);
 
 	spin_lock(&server->srv_lock);
 	server->tcpStatus = CifsExiting;
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 47f49be69ced..09c5c0f5c96e 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -790,7 +790,7 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb,
 	case 0: /* SMB1 symlink */
 	case IO_REPARSE_TAG_SYMLINK:
 	case IO_REPARSE_TAG_NFS:
-		fattr->cf_mode = S_IFLNK;
+		fattr->cf_mode = S_IFLNK | cifs_sb->ctx->file_mode;
 		fattr->cf_dtype = DT_LNK;
 		break;
 	default:
@@ -865,6 +865,8 @@ static void cifs_open_info_to_fattr(struct cifs_fattr *fattr,
 
 out_reparse:
 	if (S_ISLNK(fattr->cf_mode)) {
+		if (likely(data->symlink_target))
+			fattr->cf_eof = strnlen(data->symlink_target, PATH_MAX);
 		fattr->cf_symlink_target = data->symlink_target;
 		data->symlink_target = NULL;
 	}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 82ab62fd0040..fcfb6566b899 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -2836,6 +2836,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
 		usleep_range(512, 2048);
 	} while (++retry_count < 5);
 
+	if (!rc && !dfs_rsp)
+		rc = -EIO;
 	if (rc) {
 		if (!is_retryable_error(rc) && rc != -ENOENT && rc != -EOPNOTSUPP)
 			cifs_tcon_dbg(VFS, "%s: ioctl error: rc=%d\n", __func__, rc);
@@ -3311,6 +3313,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
 	struct inode *inode = file_inode(file);
 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 	struct cifsFileInfo *cfile = file->private_data;
+	unsigned long long new_size;
 	long rc;
 	unsigned int xid;
 	__le64 eof;
@@ -3341,10 +3344,15 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
 	/*
 	 * do we also need to change the size of the file?
 	 */
-	if (keep_size == false && i_size_read(inode) < offset + len) {
-		eof = cpu_to_le64(offset + len);
+	new_size = offset + len;
+	if (keep_size == false && (unsigned long long)i_size_read(inode) < new_size) {
+		eof = cpu_to_le64(new_size);
 		rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
 				  cfile->fid.volatile_fid, cfile->pid, &eof);
+		if (rc >= 0) {
+			truncate_setsize(inode, new_size);
+			fscache_resize_cookie(cifs_inode_cookie(inode), new_size);
+		}
 	}
 
  zero_range_exit:
@@ -3739,6 +3747,9 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon,
 	if (rc < 0)
 		goto out_2;
 
+	truncate_setsize(inode, old_eof + len);
+	fscache_resize_cookie(cifs_inode_cookie(inode), i_size_read(inode));
+
 	rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len);
 	if (rc < 0)
 		goto out_2;
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 2eb29fa278c3..20634fc6d4f0 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -158,7 +158,7 @@ out:
 
 static int
 smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
-	       struct TCP_Server_Info *server)
+	       struct TCP_Server_Info *server, bool from_reconnect)
 {
 	int rc = 0;
 	struct nls_table *nls_codepage = NULL;
@@ -331,7 +331,7 @@ again:
 				 * as cifs_put_tcp_session takes a higher lock
 				 * i.e. cifs_tcp_ses_lock
 				 */
-				cifs_put_tcp_session(server, 1);
+				cifs_put_tcp_session(server, from_reconnect);
 
 				server->terminate = true;
 				cifs_signal_cifsd_for_reconnect(server, false);
@@ -499,7 +499,7 @@ static int smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
 {
 	int rc;
 
-	rc = smb2_reconnect(smb2_command, tcon, server);
+	rc = smb2_reconnect(smb2_command, tcon, server, false);
 	if (rc)
 		return rc;
 
@@ -3472,12 +3472,10 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 	} else {
 		trace_smb3_close_done(xid, persistent_fid, tcon->tid,
 				      ses->Suid);
-		/*
-		 * Note that have to subtract 4 since struct network_open_info
-		 * has a final 4 byte pad that close response does not have
-		 */
 		if (pbuf)
-			memcpy(pbuf, (char *)&rsp->CreationTime, sizeof(*pbuf) - 4);
+			memcpy(&pbuf->network_open_info,
+			       &rsp->network_open_info,
+			       sizeof(pbuf->network_open_info));
 	}
 
 	atomic_dec(&tcon->num_remote_opens);
@@ -3897,6 +3895,15 @@ void smb2_reconnect_server(struct work_struct *work)
 	int rc;
 	bool resched = false;
 
+	/* first check if ref count has reached 0, if not inc ref count */
+	spin_lock(&cifs_tcp_ses_lock);
+	if (!server->srv_count) {
+		spin_unlock(&cifs_tcp_ses_lock);
+		return;
+	}
+	server->srv_count++;
+	spin_unlock(&cifs_tcp_ses_lock);
+
 	/* If server is a channel, select the primary channel */
 	pserver = SERVER_IS_CHAN(server) ? server->primary_server : server;
 
@@ -3954,11 +3961,10 @@ void smb2_reconnect_server(struct work_struct *work)
 		}
 		spin_unlock(&ses->chan_lock);
 	}
-
 	spin_unlock(&cifs_tcp_ses_lock);
 
 	list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) {
-		rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server);
+		rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server, true);
 		if (!rc)
 			cifs_reopen_persistent_handles(tcon);
 		else
@@ -3991,7 +3997,7 @@ void smb2_reconnect_server(struct work_struct *work)
 	/* now reconnect sessions for necessary channels */
 	list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) {
 		tcon->ses = ses;
-		rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server);
+		rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server, true);
 		if (rc)
 			resched = true;
 		list_del_init(&ses->rlist);
@@ -4001,13 +4007,8 @@ void smb2_reconnect_server(struct work_struct *work)
 
 done:
 	cifs_dbg(FYI, "Reconnecting tcons and channels finished\n");
-	if (resched) {
+	if (resched)
 		queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ);
-		mutex_unlock(&pserver->reconnect_mutex);
-
-		/* no need to put tcp session as we're retrying */
-		return;
-	}
 	mutex_unlock(&pserver->reconnect_mutex);
 
 	/* now we can safely release srv struct */
@@ -4031,12 +4032,7 @@ SMB2_echo(struct TCP_Server_Info *server)
 	    server->ops->need_neg(server)) {
 		spin_unlock(&server->srv_lock);
 		/* No need to send echo on newly established connections */
-		spin_lock(&cifs_tcp_ses_lock);
-		server->srv_count++;
-		spin_unlock(&cifs_tcp_ses_lock);
-		if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0))
-			cifs_put_tcp_session(server, false);
-
+		mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
 		return rc;
 	}
 	spin_unlock(&server->srv_lock);
diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h
index 220994d0a0f7..db08194484e0 100644
--- a/fs/smb/client/smb2pdu.h
+++ b/fs/smb/client/smb2pdu.h
@@ -319,13 +319,15 @@ struct smb2_file_reparse_point_info {
 } __packed;
 
 struct smb2_file_network_open_info {
-	__le64 CreationTime;
-	__le64 LastAccessTime;
-	__le64 LastWriteTime;
-	__le64 ChangeTime;
-	__le64 AllocationSize;
-	__le64 EndOfFile;
-	__le32 Attributes;
+	struct_group(network_open_info,
+		__le64 CreationTime;
+		__le64 LastAccessTime;
+		__le64 LastWriteTime;
+		__le64 ChangeTime;
+		__le64 AllocationSize;
+		__le64 EndOfFile;
+		__le32 Attributes;
+	);
 	__le32 Reserved;
 } __packed; /* level 34 Query also similar returned in close rsp and open rsp */
 
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 8983f45f8430..9fbaaa387dcc 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -702,13 +702,16 @@ struct smb2_close_rsp {
 	__le16 StructureSize; /* 60 */
 	__le16 Flags;
 	__le32 Reserved;
-	__le64 CreationTime;
-	__le64 LastAccessTime;
-	__le64 LastWriteTime;
-	__le64 ChangeTime;
-	__le64 AllocationSize;	/* Beginning of FILE_STANDARD_INFO equivalent */
-	__le64 EndOfFile;
-	__le32 Attributes;
+	struct_group(network_open_info,
+		__le64 CreationTime;
+		__le64 LastAccessTime;
+		__le64 LastWriteTime;
+		__le64 ChangeTime;
+		/* Beginning of FILE_STANDARD_INFO equivalent */
+		__le64 AllocationSize;
+		__le64 EndOfFile;
+		__le32 Attributes;
+	);
 } __packed;
 
 
diff --git a/fs/smb/server/ksmbd_work.c b/fs/smb/server/ksmbd_work.c
index a2ed441e837a..d7c676c151e2 100644
--- a/fs/smb/server/ksmbd_work.c
+++ b/fs/smb/server/ksmbd_work.c
@@ -56,6 +56,9 @@ void ksmbd_free_work_struct(struct ksmbd_work *work)
 	kfree(work->tr_buf);
 	kvfree(work->request_buf);
 	kfree(work->iov);
+	if (!list_empty(&work->interim_entry))
+		list_del(&work->interim_entry);
+
 	if (work->async_id)
 		ksmbd_release_id(&work->conn->async_ida, work->async_id);
 	kmem_cache_free(work_cache, work);
@@ -106,7 +109,7 @@ static inline void __ksmbd_iov_pin(struct ksmbd_work *work, void *ib,
 static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len,
 			       void *aux_buf, unsigned int aux_size)
 {
-	struct aux_read *ar;
+	struct aux_read *ar = NULL;
 	int need_iov_cnt = 1;
 
 	if (aux_size) {
@@ -123,8 +126,11 @@ static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len,
 		new = krealloc(work->iov,
 			       sizeof(struct kvec) * work->iov_alloc_cnt,
 			       GFP_KERNEL | __GFP_ZERO);
-		if (!new)
+		if (!new) {
+			kfree(ar);
+			work->iov_alloc_cnt -= 4;
 			return -ENOMEM;
+		}
 		work->iov = new;
 	}
 
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 9bc0103720f5..50c68beb71d6 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -833,7 +833,8 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo)
 					     interim_entry);
 			setup_async_work(in_work, NULL, NULL);
 			smb2_send_interim_resp(in_work, STATUS_PENDING);
-			list_del(&in_work->interim_entry);
+			list_del_init(&in_work->interim_entry);
+			release_async_work(in_work);
 		}
 		INIT_WORK(&work->work, __smb2_lease_break_noti);
 		ksmbd_queue_work(work);
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 658209839729..d369b98a6e10 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -657,13 +657,9 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls)
 
 int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
 {
-	struct smb2_hdr *rsp_hdr;
 	struct ksmbd_conn *conn = work->conn;
 	int id;
 
-	rsp_hdr = ksmbd_resp_buf_next(work);
-	rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND;
-
 	id = ksmbd_acquire_async_msg_id(&conn->async_ida);
 	if (id < 0) {
 		pr_err("Failed to alloc async message id\n");
@@ -671,7 +667,6 @@ int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
 	}
 	work->asynchronous = true;
 	work->async_id = id;
-	rsp_hdr->Id.AsyncId = cpu_to_le64(id);
 
 	ksmbd_debug(SMB,
 		    "Send interim Response to inform async request id : %d\n",
@@ -723,6 +718,8 @@ void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status)
 	       __SMB2_HEADER_STRUCTURE_SIZE);
 
 	rsp_hdr = smb2_get_msg(in_work->response_buf);
+	rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND;
+	rsp_hdr->Id.AsyncId = cpu_to_le64(work->async_id);
 	smb2_set_err_rsp(in_work);
 	rsp_hdr->Status = status;
 
@@ -2380,7 +2377,8 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
 			rc = 0;
 		} else {
 			rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value,
-						le16_to_cpu(eabuf->EaValueLength), 0);
+						le16_to_cpu(eabuf->EaValueLength),
+						0, true);
 			if (rc < 0) {
 				ksmbd_debug(SMB,
 					    "ksmbd_vfs_setxattr is failed(%d)\n",
@@ -2443,7 +2441,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path,
 		return -EBADF;
 	}
 
-	rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0);
+	rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0, false);
 	if (rc < 0)
 		pr_err("Failed to store XATTR stream name :%d\n", rc);
 	return 0;
@@ -2518,7 +2516,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path *
 	da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
 		XATTR_DOSINFO_ITIME;
 
-	rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da);
+	rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da, false);
 	if (rc)
 		ksmbd_debug(SMB, "failed to store file attribute into xattr\n");
 }
@@ -2608,7 +2606,7 @@ static int smb2_create_sd_buffer(struct ksmbd_work *work,
 	    sizeof(struct create_sd_buf_req))
 		return -EINVAL;
 	return set_info_sec(work->conn, work->tcon, path, &sd_buf->ntsd,
-			    le32_to_cpu(sd_buf->ccontext.DataLength), true);
+			    le32_to_cpu(sd_buf->ccontext.DataLength), true, false);
 }
 
 static void ksmbd_acls_fattr(struct smb_fattr *fattr,
@@ -2690,7 +2688,7 @@ int smb2_open(struct ksmbd_work *work)
 		    *(char *)req->Buffer == '\\') {
 			pr_err("not allow directory name included leading slash\n");
 			rc = -EINVAL;
-			goto err_out1;
+			goto err_out2;
 		}
 
 		name = smb2_get_name(req->Buffer,
@@ -2701,7 +2699,7 @@ int smb2_open(struct ksmbd_work *work)
 			if (rc != -ENOMEM)
 				rc = -ENOENT;
 			name = NULL;
-			goto err_out1;
+			goto err_out2;
 		}
 
 		ksmbd_debug(SMB, "converted name = %s\n", name);
@@ -2709,28 +2707,28 @@ int smb2_open(struct ksmbd_work *work)
 			if (!test_share_config_flag(work->tcon->share_conf,
 						    KSMBD_SHARE_FLAG_STREAMS)) {
 				rc = -EBADF;
-				goto err_out1;
+				goto err_out2;
 			}
 			rc = parse_stream_name(name, &stream_name, &s_type);
 			if (rc < 0)
-				goto err_out1;
+				goto err_out2;
 		}
 
 		rc = ksmbd_validate_filename(name);
 		if (rc < 0)
-			goto err_out1;
+			goto err_out2;
 
 		if (ksmbd_share_veto_filename(share, name)) {
 			rc = -ENOENT;
 			ksmbd_debug(SMB, "Reject open(), vetoed file: %s\n",
 				    name);
-			goto err_out1;
+			goto err_out2;
 		}
 	} else {
 		name = kstrdup("", GFP_KERNEL);
 		if (!name) {
 			rc = -ENOMEM;
-			goto err_out1;
+			goto err_out2;
 		}
 	}
 
@@ -2743,14 +2741,14 @@ int smb2_open(struct ksmbd_work *work)
 		       le32_to_cpu(req->ImpersonationLevel));
 		rc = -EIO;
 		rsp->hdr.Status = STATUS_BAD_IMPERSONATION_LEVEL;
-		goto err_out1;
+		goto err_out2;
 	}
 
 	if (req->CreateOptions && !(req->CreateOptions & CREATE_OPTIONS_MASK_LE)) {
 		pr_err("Invalid create options : 0x%x\n",
 		       le32_to_cpu(req->CreateOptions));
 		rc = -EINVAL;
-		goto err_out1;
+		goto err_out2;
 	} else {
 		if (req->CreateOptions & FILE_SEQUENTIAL_ONLY_LE &&
 		    req->CreateOptions & FILE_RANDOM_ACCESS_LE)
@@ -2760,13 +2758,13 @@ int smb2_open(struct ksmbd_work *work)
 		    (FILE_OPEN_BY_FILE_ID_LE | CREATE_TREE_CONNECTION |
 		     FILE_RESERVE_OPFILTER_LE)) {
 			rc = -EOPNOTSUPP;
-			goto err_out1;
+			goto err_out2;
 		}
 
 		if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) {
 			if (req->CreateOptions & FILE_NON_DIRECTORY_FILE_LE) {
 				rc = -EINVAL;
-				goto err_out1;
+				goto err_out2;
 			} else if (req->CreateOptions & FILE_NO_COMPRESSION_LE) {
 				req->CreateOptions = ~(FILE_NO_COMPRESSION_LE);
 			}
@@ -2778,21 +2776,21 @@ int smb2_open(struct ksmbd_work *work)
 		pr_err("Invalid create disposition : 0x%x\n",
 		       le32_to_cpu(req->CreateDisposition));
 		rc = -EINVAL;
-		goto err_out1;
+		goto err_out2;
 	}
 
 	if (!(req->DesiredAccess & DESIRED_ACCESS_MASK)) {
 		pr_err("Invalid desired access : 0x%x\n",
 		       le32_to_cpu(req->DesiredAccess));
 		rc = -EACCES;
-		goto err_out1;
+		goto err_out2;
 	}
 
 	if (req->FileAttributes && !(req->FileAttributes & FILE_ATTRIBUTE_MASK_LE)) {
 		pr_err("Invalid file attribute : 0x%x\n",
 		       le32_to_cpu(req->FileAttributes));
 		rc = -EINVAL;
-		goto err_out1;
+		goto err_out2;
 	}
 
 	if (req->CreateContextsOffset) {
@@ -2800,19 +2798,19 @@ int smb2_open(struct ksmbd_work *work)
 		context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER, 4);
 		if (IS_ERR(context)) {
 			rc = PTR_ERR(context);
-			goto err_out1;
+			goto err_out2;
 		} else if (context) {
 			ea_buf = (struct create_ea_buf_req *)context;
 			if (le16_to_cpu(context->DataOffset) +
 			    le32_to_cpu(context->DataLength) <
 			    sizeof(struct create_ea_buf_req)) {
 				rc = -EINVAL;
-				goto err_out1;
+				goto err_out2;
 			}
 			if (req->CreateOptions & FILE_NO_EA_KNOWLEDGE_LE) {
 				rsp->hdr.Status = STATUS_ACCESS_DENIED;
 				rc = -EACCES;
-				goto err_out1;
+				goto err_out2;
 			}
 		}
 
@@ -2820,7 +2818,7 @@ int smb2_open(struct ksmbd_work *work)
 						 SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST, 4);
 		if (IS_ERR(context)) {
 			rc = PTR_ERR(context);
-			goto err_out1;
+			goto err_out2;
 		} else if (context) {
 			ksmbd_debug(SMB,
 				    "get query maximal access context\n");
@@ -2831,11 +2829,11 @@ int smb2_open(struct ksmbd_work *work)
 						 SMB2_CREATE_TIMEWARP_REQUEST, 4);
 		if (IS_ERR(context)) {
 			rc = PTR_ERR(context);
-			goto err_out1;
+			goto err_out2;
 		} else if (context) {
 			ksmbd_debug(SMB, "get timewarp context\n");
 			rc = -EBADF;
-			goto err_out1;
+			goto err_out2;
 		}
 
 		if (tcon->posix_extensions) {
@@ -2843,7 +2841,7 @@ int smb2_open(struct ksmbd_work *work)
 							 SMB2_CREATE_TAG_POSIX, 16);
 			if (IS_ERR(context)) {
 				rc = PTR_ERR(context);
-				goto err_out1;
+				goto err_out2;
 			} else if (context) {
 				struct create_posix *posix =
 					(struct create_posix *)context;
@@ -2851,7 +2849,7 @@ int smb2_open(struct ksmbd_work *work)
 				    le32_to_cpu(context->DataLength) <
 				    sizeof(struct create_posix) - 4) {
 					rc = -EINVAL;
-					goto err_out1;
+					goto err_out2;
 				}
 				ksmbd_debug(SMB, "get posix context\n");
 
@@ -2863,7 +2861,7 @@ int smb2_open(struct ksmbd_work *work)
 
 	if (ksmbd_override_fsids(work)) {
 		rc = -ENOMEM;
-		goto err_out1;
+		goto err_out2;
 	}
 
 	rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS,
@@ -3038,7 +3036,7 @@ int smb2_open(struct ksmbd_work *work)
 		}
 	}
 
-	rc = ksmbd_query_inode_status(d_inode(path.dentry->d_parent));
+	rc = ksmbd_query_inode_status(path.dentry->d_parent);
 	if (rc == KSMBD_INODE_STATUS_PENDING_DELETE) {
 		rc = -EBUSY;
 		goto err_out;
@@ -3152,7 +3150,8 @@ int smb2_open(struct ksmbd_work *work)
 								    idmap,
 								    &path,
 								    pntsd,
-								    pntsd_size);
+								    pntsd_size,
+								    false);
 					kfree(pntsd);
 					if (rc)
 						pr_err("failed to store ntacl in xattr : %d\n",
@@ -3175,11 +3174,6 @@ int smb2_open(struct ksmbd_work *work)
 
 	fp->attrib_only = !(req->DesiredAccess & ~(FILE_READ_ATTRIBUTES_LE |
 			FILE_WRITE_ATTRIBUTES_LE | FILE_SYNCHRONIZE_LE));
-	if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC &&
-	    !fp->attrib_only && !stream_name) {
-		smb_break_all_oplock(work, fp);
-		need_truncate = 1;
-	}
 
 	/* fp should be searchable through ksmbd_inode.m_fp_list
 	 * after daccess, saccess, attrib_only, and stream are
@@ -3195,13 +3189,39 @@ int smb2_open(struct ksmbd_work *work)
 		goto err_out;
 	}
 
+	rc = ksmbd_vfs_getattr(&path, &stat);
+	if (rc)
+		goto err_out;
+
+	if (stat.result_mask & STATX_BTIME)
+		fp->create_time = ksmbd_UnixTimeToNT(stat.btime);
+	else
+		fp->create_time = ksmbd_UnixTimeToNT(stat.ctime);
+	if (req->FileAttributes || fp->f_ci->m_fattr == 0)
+		fp->f_ci->m_fattr =
+			cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes)));
+
+	if (!created)
+		smb2_update_xattrs(tcon, &path, fp);
+	else
+		smb2_new_xattrs(tcon, &path, fp);
+
+	if (file_present || created)
+		ksmbd_vfs_kern_path_unlock(&parent_path, &path);
+
+	if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC &&
+	    !fp->attrib_only && !stream_name) {
+		smb_break_all_oplock(work, fp);
+		need_truncate = 1;
+	}
+
 	share_ret = ksmbd_smb_check_shared_mode(fp->filp, fp);
 	if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_OPLOCKS) ||
 	    (req_op_level == SMB2_OPLOCK_LEVEL_LEASE &&
 	     !(conn->vals->capabilities & SMB2_GLOBAL_CAP_LEASING))) {
 		if (share_ret < 0 && !S_ISDIR(file_inode(fp->filp)->i_mode)) {
 			rc = share_ret;
-			goto err_out;
+			goto err_out1;
 		}
 	} else {
 		if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) {
@@ -3211,7 +3231,7 @@ int smb2_open(struct ksmbd_work *work)
 				    name, req_op_level, lc->req_state);
 			rc = find_same_lease_key(sess, fp->f_ci, lc);
 			if (rc)
-				goto err_out;
+				goto err_out1;
 		} else if (open_flags == O_RDONLY &&
 			   (req_op_level == SMB2_OPLOCK_LEVEL_BATCH ||
 			    req_op_level == SMB2_OPLOCK_LEVEL_EXCLUSIVE))
@@ -3222,16 +3242,16 @@ int smb2_open(struct ksmbd_work *work)
 				      le32_to_cpu(req->hdr.Id.SyncId.TreeId),
 				      lc, share_ret);
 		if (rc < 0)
-			goto err_out;
+			goto err_out1;
 	}
 
 	if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)
 		ksmbd_fd_set_delete_on_close(fp, file_info);
 
 	if (need_truncate) {
-		rc = smb2_create_truncate(&path);
+		rc = smb2_create_truncate(&fp->filp->f_path);
 		if (rc)
-			goto err_out;
+			goto err_out1;
 	}
 
 	if (req->CreateContextsOffset) {
@@ -3241,7 +3261,7 @@ int smb2_open(struct ksmbd_work *work)
 					SMB2_CREATE_ALLOCATION_SIZE, 4);
 		if (IS_ERR(az_req)) {
 			rc = PTR_ERR(az_req);
-			goto err_out;
+			goto err_out1;
 		} else if (az_req) {
 			loff_t alloc_size;
 			int err;
@@ -3250,7 +3270,7 @@ int smb2_open(struct ksmbd_work *work)
 			    le32_to_cpu(az_req->ccontext.DataLength) <
 			    sizeof(struct create_alloc_size_req)) {
 				rc = -EINVAL;
-				goto err_out;
+				goto err_out1;
 			}
 			alloc_size = le64_to_cpu(az_req->AllocationSize);
 			ksmbd_debug(SMB,
@@ -3268,30 +3288,13 @@ int smb2_open(struct ksmbd_work *work)
 		context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID, 4);
 		if (IS_ERR(context)) {
 			rc = PTR_ERR(context);
-			goto err_out;
+			goto err_out1;
 		} else if (context) {
 			ksmbd_debug(SMB, "get query on disk id context\n");
 			query_disk_id = 1;
 		}
 	}
 
-	rc = ksmbd_vfs_getattr(&path, &stat);
-	if (rc)
-		goto err_out;
-
-	if (stat.result_mask & STATX_BTIME)
-		fp->create_time = ksmbd_UnixTimeToNT(stat.btime);
-	else
-		fp->create_time = ksmbd_UnixTimeToNT(stat.ctime);
-	if (req->FileAttributes || fp->f_ci->m_fattr == 0)
-		fp->f_ci->m_fattr =
-			cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes)));
-
-	if (!created)
-		smb2_update_xattrs(tcon, &path, fp);
-	else
-		smb2_new_xattrs(tcon, &path, fp);
-
 	memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
 
 	rsp->StructureSize = cpu_to_le16(89);
@@ -3398,13 +3401,13 @@ int smb2_open(struct ksmbd_work *work)
 	}
 
 err_out:
-	if (file_present || created) {
-		inode_unlock(d_inode(parent_path.dentry));
-		path_put(&path);
-		path_put(&parent_path);
-	}
-	ksmbd_revert_fsids(work);
+	if (rc && (file_present || created))
+		ksmbd_vfs_kern_path_unlock(&parent_path, &path);
+
 err_out1:
+	ksmbd_revert_fsids(work);
+
+err_out2:
 	if (!rc) {
 		ksmbd_update_fstate(&work->sess->file_table, fp, FP_INITED);
 		rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len);
@@ -5537,7 +5540,7 @@ static int smb2_rename(struct ksmbd_work *work,
 		rc = ksmbd_vfs_setxattr(file_mnt_idmap(fp->filp),
 					&fp->filp->f_path,
 					xattr_stream_name,
-					NULL, 0, 0);
+					NULL, 0, 0, true);
 		if (rc < 0) {
 			pr_err("failed to store stream name in xattr: %d\n",
 			       rc);
@@ -5630,11 +5633,9 @@ static int smb2_create_link(struct ksmbd_work *work,
 	if (rc)
 		rc = -EINVAL;
 out:
-	if (file_present) {
-		inode_unlock(d_inode(parent_path.dentry));
-		path_put(&path);
-		path_put(&parent_path);
-	}
+	if (file_present)
+		ksmbd_vfs_kern_path_unlock(&parent_path, &path);
+
 	if (!IS_ERR(link_name))
 		kfree(link_name);
 	kfree(pathname);
@@ -5701,7 +5702,8 @@ static int set_file_basic_info(struct ksmbd_file *fp,
 		da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
 			XATTR_DOSINFO_ITIME;
 
-		rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da);
+		rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da,
+				true);
 		if (rc)
 			ksmbd_debug(SMB,
 				    "failed to restore file attribute in EA\n");
@@ -6013,7 +6015,7 @@ static int smb2_set_info_sec(struct ksmbd_file *fp, int addition_info,
 	fp->saccess |= FILE_SHARE_DELETE_LE;
 
 	return set_info_sec(fp->conn, fp->tcon, &fp->filp->f_path, pntsd,
-			buf_len, false);
+			buf_len, false, true);
 }
 
 /**
@@ -7582,7 +7584,8 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id,
 
 		da.attr = le32_to_cpu(fp->f_ci->m_fattr);
 		ret = ksmbd_vfs_set_dos_attrib_xattr(idmap,
-						     &fp->filp->f_path, &da);
+						     &fp->filp->f_path,
+						     &da, true);
 		if (ret)
 			fp->f_ci->m_fattr = old_fattr;
 	}
@@ -8231,7 +8234,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
 		return;
 
 err_out:
-	opinfo->op_state = OPLOCK_STATE_NONE;
 	wake_up_interruptible_all(&opinfo->oplock_q);
 	atomic_dec(&opinfo->breaking_cnt);
 	wake_up_interruptible_all(&opinfo->oplock_brk);
diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c
index 51b8bfab7481..1164365533f0 100644
--- a/fs/smb/server/smbacl.c
+++ b/fs/smb/server/smbacl.c
@@ -1185,7 +1185,7 @@ pass:
 			pntsd_size += sizeof(struct smb_acl) + nt_size;
 		}
 
-		ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size);
+		ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size, false);
 		kfree(pntsd);
 	}
 
@@ -1377,7 +1377,7 @@ err_out:
 
 int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
 		 const struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
-		 bool type_check)
+		 bool type_check, bool get_write)
 {
 	int rc;
 	struct smb_fattr fattr = {{0}};
@@ -1437,7 +1437,8 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
 	if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) {
 		/* Update WinACL in xattr */
 		ksmbd_vfs_remove_sd_xattrs(idmap, path);
-		ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len);
+		ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len,
+				get_write);
 	}
 
 out:
diff --git a/fs/smb/server/smbacl.h b/fs/smb/server/smbacl.h
index 49a8c292bd2e..2b52861707d8 100644
--- a/fs/smb/server/smbacl.h
+++ b/fs/smb/server/smbacl.h
@@ -207,7 +207,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
 			__le32 *pdaccess, int uid);
 int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
 		 const struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
-		 bool type_check);
+		 bool type_check, bool get_write);
 void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid);
 void ksmbd_init_domain(u32 *sub_auth);
 
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index c53dea5598fc..9091dcd7a310 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -97,6 +97,13 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
 		return -ENOENT;
 	}
 
+	err = mnt_want_write(parent_path->mnt);
+	if (err) {
+		path_put(parent_path);
+		putname(filename);
+		return -ENOENT;
+	}
+
 	inode_lock_nested(parent_path->dentry->d_inode, I_MUTEX_PARENT);
 	d = lookup_one_qstr_excl(&last, parent_path->dentry, 0);
 	if (IS_ERR(d))
@@ -123,6 +130,7 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
 
 err_out:
 	inode_unlock(d_inode(parent_path->dentry));
+	mnt_drop_write(parent_path->mnt);
 	path_put(parent_path);
 	putname(filename);
 	return -ENOENT;
@@ -451,7 +459,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
 				 fp->stream.name,
 				 (void *)stream_buf,
 				 size,
-				 0);
+				 0,
+				 true);
 	if (err < 0)
 		goto out;
 
@@ -593,10 +602,6 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path)
 		goto out_err;
 	}
 
-	err = mnt_want_write(path->mnt);
-	if (err)
-		goto out_err;
-
 	idmap = mnt_idmap(path->mnt);
 	if (S_ISDIR(d_inode(path->dentry)->i_mode)) {
 		err = vfs_rmdir(idmap, d_inode(parent), path->dentry);
@@ -607,7 +612,6 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path)
 		if (err)
 			ksmbd_debug(VFS, "unlink failed, err %d\n", err);
 	}
-	mnt_drop_write(path->mnt);
 
 out_err:
 	ksmbd_revert_fsids(work);
@@ -715,7 +719,7 @@ retry:
 		goto out3;
 	}
 
-	parent_fp = ksmbd_lookup_fd_inode(d_inode(old_child->d_parent));
+	parent_fp = ksmbd_lookup_fd_inode(old_child->d_parent);
 	if (parent_fp) {
 		if (parent_fp->daccess & FILE_DELETE_LE) {
 			pr_err("parent dir is opened with delete access\n");
@@ -907,18 +911,22 @@ ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap,
  * @attr_value:	xattr value to set
  * @attr_size:	size of xattr value
  * @flags:	destination buffer length
+ * @get_write:	get write access to a mount
  *
  * Return:	0 on success, otherwise error
  */
 int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
 		       const struct path *path, const char *attr_name,
-		       void *attr_value, size_t attr_size, int flags)
+		       void *attr_value, size_t attr_size, int flags,
+		       bool get_write)
 {
 	int err;
 
-	err = mnt_want_write(path->mnt);
-	if (err)
-		return err;
+	if (get_write == true) {
+		err = mnt_want_write(path->mnt);
+		if (err)
+			return err;
+	}
 
 	err = vfs_setxattr(idmap,
 			   path->dentry,
@@ -928,7 +936,8 @@ int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
 			   flags);
 	if (err)
 		ksmbd_debug(VFS, "setxattr failed, err %d\n", err);
-	mnt_drop_write(path->mnt);
+	if (get_write == true)
+		mnt_drop_write(path->mnt);
 	return err;
 }
 
@@ -1252,6 +1261,13 @@ out1:
 	}
 
 	if (!err) {
+		err = mnt_want_write(parent_path->mnt);
+		if (err) {
+			path_put(path);
+			path_put(parent_path);
+			return err;
+		}
+
 		err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry);
 		if (err) {
 			path_put(path);
@@ -1261,6 +1277,14 @@ out1:
 	return err;
 }
 
+void ksmbd_vfs_kern_path_unlock(struct path *parent_path, struct path *path)
+{
+	inode_unlock(d_inode(parent_path->dentry));
+	mnt_drop_write(parent_path->mnt);
+	path_put(path);
+	path_put(parent_path);
+}
+
 struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
 					  const char *name,
 					  unsigned int flags,
@@ -1415,7 +1439,8 @@ out:
 int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
 			   struct mnt_idmap *idmap,
 			   const struct path *path,
-			   struct smb_ntsd *pntsd, int len)
+			   struct smb_ntsd *pntsd, int len,
+			   bool get_write)
 {
 	int rc;
 	struct ndr sd_ndr = {0}, acl_ndr = {0};
@@ -1475,7 +1500,7 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
 
 	rc = ksmbd_vfs_setxattr(idmap, path,
 				XATTR_NAME_SD, sd_ndr.data,
-				sd_ndr.offset, 0);
+				sd_ndr.offset, 0, get_write);
 	if (rc < 0)
 		pr_err("Failed to store XATTR ntacl :%d\n", rc);
 
@@ -1564,7 +1589,8 @@ free_n_data:
 
 int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
 				   const struct path *path,
-				   struct xattr_dos_attrib *da)
+				   struct xattr_dos_attrib *da,
+				   bool get_write)
 {
 	struct ndr n;
 	int err;
@@ -1574,7 +1600,7 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
 		return err;
 
 	err = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_DOS_ATTRIBUTE,
-				 (void *)n.data, n.offset, 0);
+				 (void *)n.data, n.offset, 0, get_write);
 	if (err)
 		ksmbd_debug(SMB, "failed to store dos attribute in xattr\n");
 	kfree(n.data);
@@ -1846,10 +1872,6 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
 	}
 	posix_state_to_acl(&acl_state, acls->a_entries);
 
-	rc = mnt_want_write(path->mnt);
-	if (rc)
-		goto out_err;
-
 	rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
 	if (rc < 0)
 		ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
@@ -1861,9 +1883,7 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
 			ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
 				    rc);
 	}
-	mnt_drop_write(path->mnt);
 
-out_err:
 	free_acl_state(&acl_state);
 	posix_acl_release(acls);
 	return rc;
@@ -1893,10 +1913,6 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
 		}
 	}
 
-	rc = mnt_want_write(path->mnt);
-	if (rc)
-		goto out_err;
-
 	rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
 	if (rc < 0)
 		ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
@@ -1908,9 +1924,7 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
 			ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
 				    rc);
 	}
-	mnt_drop_write(path->mnt);
 
-out_err:
 	posix_acl_release(acls);
 	return rc;
 }
diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h
index 00968081856e..cfe1c8092f23 100644
--- a/fs/smb/server/vfs.h
+++ b/fs/smb/server/vfs.h
@@ -109,7 +109,8 @@ ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap,
 				int attr_name_len);
 int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
 		       const struct path *path, const char *attr_name,
-		       void *attr_value, size_t attr_size, int flags);
+		       void *attr_value, size_t attr_size, int flags,
+		       bool get_write);
 int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
 				size_t *xattr_stream_name_size, int s_type);
 int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
@@ -117,6 +118,7 @@ int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
 int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
 			       unsigned int flags, struct path *parent_path,
 			       struct path *path, bool caseless);
+void ksmbd_vfs_kern_path_unlock(struct path *parent_path, struct path *path);
 struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
 					  const char *name,
 					  unsigned int flags,
@@ -144,14 +146,16 @@ int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path)
 int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
 			   struct mnt_idmap *idmap,
 			   const struct path *path,
-			   struct smb_ntsd *pntsd, int len);
+			   struct smb_ntsd *pntsd, int len,
+			   bool get_write);
 int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
 			   struct mnt_idmap *idmap,
 			   struct dentry *dentry,
 			   struct smb_ntsd **pntsd);
 int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
 				   const struct path *path,
-				   struct xattr_dos_attrib *da);
+				   struct xattr_dos_attrib *da,
+				   bool get_write);
 int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap,
 				   struct dentry *dentry,
 				   struct xattr_dos_attrib *da);
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
index c91eac6514dd..ddf233994ddb 100644
--- a/fs/smb/server/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -66,14 +66,14 @@ static unsigned long inode_hash(struct super_block *sb, unsigned long hashval)
 	return tmp & inode_hash_mask;
 }
 
-static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode)
+static struct ksmbd_inode *__ksmbd_inode_lookup(struct dentry *de)
 {
 	struct hlist_head *head = inode_hashtable +
-		inode_hash(inode->i_sb, inode->i_ino);
+		inode_hash(d_inode(de)->i_sb, (unsigned long)de);
 	struct ksmbd_inode *ci = NULL, *ret_ci = NULL;
 
 	hlist_for_each_entry(ci, head, m_hash) {
-		if (ci->m_inode == inode) {
+		if (ci->m_de == de) {
 			if (atomic_inc_not_zero(&ci->m_count))
 				ret_ci = ci;
 			break;
@@ -84,26 +84,16 @@ static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode)
 
 static struct ksmbd_inode *ksmbd_inode_lookup(struct ksmbd_file *fp)
 {
-	return __ksmbd_inode_lookup(file_inode(fp->filp));
+	return __ksmbd_inode_lookup(fp->filp->f_path.dentry);
 }
 
-static struct ksmbd_inode *ksmbd_inode_lookup_by_vfsinode(struct inode *inode)
-{
-	struct ksmbd_inode *ci;
-
-	read_lock(&inode_hash_lock);
-	ci = __ksmbd_inode_lookup(inode);
-	read_unlock(&inode_hash_lock);
-	return ci;
-}
-
-int ksmbd_query_inode_status(struct inode *inode)
+int ksmbd_query_inode_status(struct dentry *dentry)
 {
 	struct ksmbd_inode *ci;
 	int ret = KSMBD_INODE_STATUS_UNKNOWN;
 
 	read_lock(&inode_hash_lock);
-	ci = __ksmbd_inode_lookup(inode);
+	ci = __ksmbd_inode_lookup(dentry);
 	if (ci) {
 		ret = KSMBD_INODE_STATUS_OK;
 		if (ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS))
@@ -143,7 +133,7 @@ void ksmbd_fd_set_delete_on_close(struct ksmbd_file *fp,
 static void ksmbd_inode_hash(struct ksmbd_inode *ci)
 {
 	struct hlist_head *b = inode_hashtable +
-		inode_hash(ci->m_inode->i_sb, ci->m_inode->i_ino);
+		inode_hash(d_inode(ci->m_de)->i_sb, (unsigned long)ci->m_de);
 
 	hlist_add_head(&ci->m_hash, b);
 }
@@ -157,7 +147,6 @@ static void ksmbd_inode_unhash(struct ksmbd_inode *ci)
 
 static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp)
 {
-	ci->m_inode = file_inode(fp->filp);
 	atomic_set(&ci->m_count, 1);
 	atomic_set(&ci->op_count, 0);
 	atomic_set(&ci->sop_count, 0);
@@ -166,6 +155,7 @@ static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp)
 	INIT_LIST_HEAD(&ci->m_fp_list);
 	INIT_LIST_HEAD(&ci->m_op_list);
 	rwlock_init(&ci->m_lock);
+	ci->m_de = fp->filp->f_path.dentry;
 	return 0;
 }
 
@@ -488,12 +478,15 @@ struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid)
 	return fp;
 }
 
-struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode)
+struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry)
 {
 	struct ksmbd_file	*lfp;
 	struct ksmbd_inode	*ci;
+	struct inode		*inode = d_inode(dentry);
 
-	ci = ksmbd_inode_lookup_by_vfsinode(inode);
+	read_lock(&inode_hash_lock);
+	ci = __ksmbd_inode_lookup(dentry);
+	read_unlock(&inode_hash_lock);
 	if (!ci)
 		return NULL;
 
diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h
index 03d0bf941216..8325cf4527c4 100644
--- a/fs/smb/server/vfs_cache.h
+++ b/fs/smb/server/vfs_cache.h
@@ -51,7 +51,7 @@ struct ksmbd_inode {
 	atomic_t			op_count;
 	/* opinfo count for streams */
 	atomic_t			sop_count;
-	struct inode			*m_inode;
+	struct dentry			*m_de;
 	unsigned int			m_flags;
 	struct hlist_node		m_hash;
 	struct list_head		m_fp_list;
@@ -140,7 +140,7 @@ struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id,
 void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp);
 struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id);
 struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid);
-struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode);
+struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry);
 unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp);
 struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp);
 void ksmbd_close_tree_conn_fds(struct ksmbd_work *work);
@@ -164,7 +164,7 @@ enum KSMBD_INODE_STATUS {
 	KSMBD_INODE_STATUS_PENDING_DELETE,
 };
 
-int ksmbd_query_inode_status(struct inode *inode);
+int ksmbd_query_inode_status(struct dentry *dentry);
 bool ksmbd_inode_pending_delete(struct ksmbd_file *fp);
 void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp);
 void ksmbd_clear_inode_pending_delete(struct ksmbd_file *fp);
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 581ce9519339..2dc730800f44 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -321,7 +321,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
 		TRACE("Block @ 0x%llx, %scompressed size %d\n", index - 2,
 		      compressed ? "" : "un", length);
 	}
-	if (length < 0 || length > output->length ||
+	if (length <= 0 || length > output->length ||
 			(index + length) > msblk->bytes_used) {
 		res = -EIO;
 		goto out;
diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h
index fea528aacfe2..9aa0a05aa072 100644
--- a/include/drm/drm_atomic_helper.h
+++ b/include/drm/drm_atomic_helper.h
@@ -96,6 +96,8 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev,
 
 int drm_atomic_helper_prepare_planes(struct drm_device *dev,
 				     struct drm_atomic_state *state);
+void drm_atomic_helper_unprepare_planes(struct drm_device *dev,
+					struct drm_atomic_state *state);
 
 #define DRM_PLANE_COMMIT_ACTIVE_ONLY			BIT(0)
 #define DRM_PLANE_COMMIT_NO_DISABLE_AFTER_MODESET	BIT(1)
diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h
index b5bf0b6da791..f1a66c048721 100644
--- a/include/drm/drm_exec.h
+++ b/include/drm/drm_exec.h
@@ -135,7 +135,7 @@ static inline bool drm_exec_is_contended(struct drm_exec *exec)
 	return !!exec->contended;
 }
 
-void drm_exec_init(struct drm_exec *exec, uint32_t flags);
+void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr);
 void drm_exec_fini(struct drm_exec *exec);
 bool drm_exec_cleanup(struct drm_exec *exec);
 int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj);
diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index f3fdb810989a..9060f9fae6f1 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
 
 #ifndef __DRM_GPUVM_H__
 #define __DRM_GPUVM_H__
@@ -1018,6 +1018,16 @@ struct drm_gpuva_ops {
 	list_for_each_entry_from_reverse(op, &(ops)->list, entry)
 
 /**
+ * drm_gpuva_for_each_op_reverse - iterator to walk over &drm_gpuva_ops in reverse
+ * @op: &drm_gpuva_op to assign in each iteration step
+ * @ops: &drm_gpuva_ops to walk
+ *
+ * This iterator walks over all ops within a given list of operations in reverse
+ */
+#define drm_gpuva_for_each_op_reverse(op, ops) \
+	list_for_each_entry_reverse(op, &(ops)->list, entry)
+
+/**
  * drm_gpuva_first_op() - returns the first &drm_gpuva_op from &drm_gpuva_ops
  * @ops: the &drm_gpuva_ops to get the fist &drm_gpuva_op from
  */
diff --git a/include/drm/drm_mode_object.h b/include/drm/drm_mode_object.h
index 912f1e415685..08d7a7f0188f 100644
--- a/include/drm/drm_mode_object.h
+++ b/include/drm/drm_mode_object.h
@@ -60,7 +60,7 @@ struct drm_mode_object {
 	void (*free_cb)(struct kref *kref);
 };
 
-#define DRM_OBJECT_MAX_PROPERTY 24
+#define DRM_OBJECT_MAX_PROPERTY 64
 /**
  * struct drm_object_properties - property tracking for &drm_mode_object
  */
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index c6565a6f9324..641fe298052d 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -251,6 +251,13 @@ struct drm_plane_state {
 
 	/** @state: backpointer to global drm_atomic_state */
 	struct drm_atomic_state *state;
+
+	/**
+	 * @color_mgmt_changed: Color management properties have changed. Used
+	 * by the atomic helpers and drivers to steer the atomic commit control
+	 * flow.
+	 */
+	bool color_mgmt_changed : 1;
 };
 
 static inline struct drm_rect
diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h
index a7abf9f3e697..2a1d01e5b56b 100644
--- a/include/drm/drm_prime.h
+++ b/include/drm/drm_prime.h
@@ -60,12 +60,19 @@ enum dma_data_direction;
 
 struct drm_device;
 struct drm_gem_object;
+struct drm_file;
 
 /* core prime functions */
 struct dma_buf *drm_gem_dmabuf_export(struct drm_device *dev,
 				      struct dma_buf_export_info *exp_info);
 void drm_gem_dmabuf_release(struct dma_buf *dma_buf);
 
+int drm_gem_prime_fd_to_handle(struct drm_device *dev,
+			       struct drm_file *file_priv, int prime_fd, uint32_t *handle);
+int drm_gem_prime_handle_to_fd(struct drm_device *dev,
+			       struct drm_file *file_priv, uint32_t handle, uint32_t flags,
+			       int *prime_fd);
+
 /* helper functions for exporting */
 int drm_gem_map_attach(struct dma_buf *dma_buf,
 		       struct dma_buf_attachment *attach);
diff --git a/include/drm/drm_property.h b/include/drm/drm_property.h
index 65bc9710a470..082f29156b3e 100644
--- a/include/drm/drm_property.h
+++ b/include/drm/drm_property.h
@@ -279,6 +279,12 @@ struct drm_property_blob *drm_property_create_blob(struct drm_device *dev,
 						   const void *data);
 struct drm_property_blob *drm_property_lookup_blob(struct drm_device *dev,
 						   uint32_t id);
+int drm_property_replace_blob_from_id(struct drm_device *dev,
+				      struct drm_property_blob **blob,
+				      uint64_t blob_id,
+				      ssize_t expected_size,
+				      ssize_t expected_elem_size,
+				      bool *replaced);
 int drm_property_replace_global_blob(struct drm_device *dev,
 				     struct drm_property_blob **replace,
 				     size_t length,
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 1c9ea6ab3eb9..fcf1849aa47c 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -738,7 +738,8 @@
 	INTEL_DG2_G12_IDS(info)
 
 #define INTEL_ATS_M150_IDS(info) \
-	INTEL_VGA_DEVICE(0x56C0, info)
+	INTEL_VGA_DEVICE(0x56C0, info), \
+	INTEL_VGA_DEVICE(0x56C2, info)
 
 #define INTEL_ATS_M75_IDS(info) \
 	INTEL_VGA_DEVICE(0x56C1, info)
diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
new file mode 100644
index 000000000000..de1a344737bc
--- /dev/null
+++ b/include/drm/xe_pciids.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PCIIDS_H_
+#define _XE_PCIIDS_H_
+
+/*
+ * Lists below can be turned into initializers for a struct pci_device_id
+ * by defining INTEL_VGA_DEVICE:
+ *
+ * #define INTEL_VGA_DEVICE(id, info) { \
+ *	0x8086, id,			\
+ *	~0, ~0,				\
+ *	0x030000, 0xff0000,		\
+ *	(unsigned long) info }
+ *
+ * And then calling like:
+ *
+ * XE_TGL_12_GT1_IDS(INTEL_VGA_DEVICE, ## __VA_ARGS__)
+ *
+ * To turn them into something else, just provide a different macro passed as
+ * first argument.
+ */
+
+/* TGL */
+#define XE_TGL_GT1_IDS(MACRO__, ...)		\
+	MACRO__(0x9A60, ## __VA_ARGS__),	\
+	MACRO__(0x9A68, ## __VA_ARGS__),	\
+	MACRO__(0x9A70, ## __VA_ARGS__)
+
+#define XE_TGL_GT2_IDS(MACRO__, ...)		\
+	MACRO__(0x9A40, ## __VA_ARGS__),	\
+	MACRO__(0x9A49, ## __VA_ARGS__),	\
+	MACRO__(0x9A59, ## __VA_ARGS__),	\
+	MACRO__(0x9A78, ## __VA_ARGS__),	\
+	MACRO__(0x9AC0, ## __VA_ARGS__),	\
+	MACRO__(0x9AC9, ## __VA_ARGS__),	\
+	MACRO__(0x9AD9, ## __VA_ARGS__),	\
+	MACRO__(0x9AF8, ## __VA_ARGS__)
+
+#define XE_TGL_IDS(MACRO__, ...)		\
+	XE_TGL_GT1_IDS(MACRO__, ## __VA_ARGS__),\
+	XE_TGL_GT2_IDS(MACRO__, ## __VA_ARGS__)
+
+/* RKL */
+#define XE_RKL_IDS(MACRO__, ...)		\
+	MACRO__(0x4C80, ## __VA_ARGS__),	\
+	MACRO__(0x4C8A, ## __VA_ARGS__),	\
+	MACRO__(0x4C8B, ## __VA_ARGS__),	\
+	MACRO__(0x4C8C, ## __VA_ARGS__),	\
+	MACRO__(0x4C90, ## __VA_ARGS__),	\
+	MACRO__(0x4C9A, ## __VA_ARGS__)
+
+/* DG1 */
+#define XE_DG1_IDS(MACRO__, ...)		\
+	MACRO__(0x4905, ## __VA_ARGS__),	\
+	MACRO__(0x4906, ## __VA_ARGS__),	\
+	MACRO__(0x4907, ## __VA_ARGS__),	\
+	MACRO__(0x4908, ## __VA_ARGS__),	\
+	MACRO__(0x4909, ## __VA_ARGS__)
+
+/* ADL-S */
+#define XE_ADLS_IDS(MACRO__, ...)		\
+	MACRO__(0x4680, ## __VA_ARGS__),	\
+	MACRO__(0x4682, ## __VA_ARGS__),	\
+	MACRO__(0x4688, ## __VA_ARGS__),	\
+	MACRO__(0x468A, ## __VA_ARGS__),	\
+	MACRO__(0x468B, ## __VA_ARGS__),	\
+	MACRO__(0x4690, ## __VA_ARGS__),	\
+	MACRO__(0x4692, ## __VA_ARGS__),	\
+	MACRO__(0x4693, ## __VA_ARGS__)
+
+/* ADL-P */
+#define XE_ADLP_IDS(MACRO__, ...)		\
+	MACRO__(0x46A0, ## __VA_ARGS__),	\
+	MACRO__(0x46A1, ## __VA_ARGS__),	\
+	MACRO__(0x46A2, ## __VA_ARGS__),	\
+	MACRO__(0x46A3, ## __VA_ARGS__),	\
+	MACRO__(0x46A6, ## __VA_ARGS__),	\
+	MACRO__(0x46A8, ## __VA_ARGS__),	\
+	MACRO__(0x46AA, ## __VA_ARGS__),	\
+	MACRO__(0x462A, ## __VA_ARGS__),	\
+	MACRO__(0x4626, ## __VA_ARGS__),	\
+	MACRO__(0x4628, ## __VA_ARGS__),	\
+	MACRO__(0x46B0, ## __VA_ARGS__),	\
+	MACRO__(0x46B1, ## __VA_ARGS__),	\
+	MACRO__(0x46B2, ## __VA_ARGS__),	\
+	MACRO__(0x46B3, ## __VA_ARGS__),	\
+	MACRO__(0x46C0, ## __VA_ARGS__),	\
+	MACRO__(0x46C1, ## __VA_ARGS__),	\
+	MACRO__(0x46C2, ## __VA_ARGS__),	\
+	MACRO__(0x46C3, ## __VA_ARGS__)
+
+/* ADL-N */
+#define XE_ADLN_IDS(MACRO__, ...)		\
+	MACRO__(0x46D0, ## __VA_ARGS__),	\
+	MACRO__(0x46D1, ## __VA_ARGS__),	\
+	MACRO__(0x46D2, ## __VA_ARGS__)
+
+/* RPL-S */
+#define XE_RPLS_IDS(MACRO__, ...)		\
+	MACRO__(0xA780, ## __VA_ARGS__),	\
+	MACRO__(0xA781, ## __VA_ARGS__),	\
+	MACRO__(0xA782, ## __VA_ARGS__),	\
+	MACRO__(0xA783, ## __VA_ARGS__),	\
+	MACRO__(0xA788, ## __VA_ARGS__),	\
+	MACRO__(0xA789, ## __VA_ARGS__),	\
+	MACRO__(0xA78A, ## __VA_ARGS__),	\
+	MACRO__(0xA78B, ## __VA_ARGS__)
+
+/* RPL-U */
+#define XE_RPLU_IDS(MACRO__, ...)		\
+	MACRO__(0xA721, ## __VA_ARGS__),	\
+	MACRO__(0xA7A1, ## __VA_ARGS__),	\
+	MACRO__(0xA7A9, ## __VA_ARGS__),	\
+	MACRO__(0xA7AC, ## __VA_ARGS__),	\
+	MACRO__(0xA7AD, ## __VA_ARGS__)
+
+/* RPL-P */
+#define XE_RPLP_IDS(MACRO__, ...)		\
+	XE_RPLU_IDS(MACRO__, ## __VA_ARGS__),	\
+	MACRO__(0xA720, ## __VA_ARGS__),	\
+	MACRO__(0xA7A0, ## __VA_ARGS__),	\
+	MACRO__(0xA7A8, ## __VA_ARGS__),	\
+	MACRO__(0xA7AA, ## __VA_ARGS__),	\
+	MACRO__(0xA7AB, ## __VA_ARGS__)
+
+/* DG2 */
+#define XE_DG2_G10_IDS(MACRO__, ...)		\
+	MACRO__(0x5690, ## __VA_ARGS__),	\
+	MACRO__(0x5691, ## __VA_ARGS__),	\
+	MACRO__(0x5692, ## __VA_ARGS__),	\
+	MACRO__(0x56A0, ## __VA_ARGS__),	\
+	MACRO__(0x56A1, ## __VA_ARGS__),	\
+	MACRO__(0x56A2, ## __VA_ARGS__)
+
+#define XE_DG2_G11_IDS(MACRO__, ...)		\
+	MACRO__(0x5693, ## __VA_ARGS__),	\
+	MACRO__(0x5694, ## __VA_ARGS__),	\
+	MACRO__(0x5695, ## __VA_ARGS__),	\
+	MACRO__(0x56A5, ## __VA_ARGS__),	\
+	MACRO__(0x56A6, ## __VA_ARGS__),	\
+	MACRO__(0x56B0, ## __VA_ARGS__),	\
+	MACRO__(0x56B1, ## __VA_ARGS__),	\
+	MACRO__(0x56BA, ## __VA_ARGS__),	\
+	MACRO__(0x56BB, ## __VA_ARGS__),	\
+	MACRO__(0x56BC, ## __VA_ARGS__),	\
+	MACRO__(0x56BD, ## __VA_ARGS__)
+
+#define XE_DG2_G12_IDS(MACRO__, ...)		\
+	MACRO__(0x5696, ## __VA_ARGS__),	\
+	MACRO__(0x5697, ## __VA_ARGS__),	\
+	MACRO__(0x56A3, ## __VA_ARGS__),	\
+	MACRO__(0x56A4, ## __VA_ARGS__),	\
+	MACRO__(0x56B2, ## __VA_ARGS__),	\
+	MACRO__(0x56B3, ## __VA_ARGS__)
+
+#define XE_DG2_IDS(MACRO__, ...)		\
+	XE_DG2_G10_IDS(MACRO__, ## __VA_ARGS__),\
+	XE_DG2_G11_IDS(MACRO__, ## __VA_ARGS__),\
+	XE_DG2_G12_IDS(MACRO__, ## __VA_ARGS__)
+
+#define XE_ATS_M150_IDS(MACRO__, ...)		\
+	MACRO__(0x56C0, ## __VA_ARGS__),	\
+	MACRO__(0x56C2, ## __VA_ARGS__)
+
+#define XE_ATS_M75_IDS(MACRO__, ...)		\
+	MACRO__(0x56C1, ## __VA_ARGS__)
+
+#define XE_ATS_M_IDS(MACRO__, ...)		\
+	XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\
+	XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__)
+
+/* MTL / ARL */
+#define XE_MTL_IDS(MACRO__, ...)		\
+	MACRO__(0x7D40, ## __VA_ARGS__),	\
+	MACRO__(0x7D45, ## __VA_ARGS__),	\
+	MACRO__(0x7D55, ## __VA_ARGS__),	\
+	MACRO__(0x7D60, ## __VA_ARGS__),	\
+	MACRO__(0x7D67, ## __VA_ARGS__),	\
+	MACRO__(0x7DD5, ## __VA_ARGS__)
+
+#define XE_LNL_IDS(MACRO__, ...) \
+	MACRO__(0x6420, ## __VA_ARGS__), \
+	MACRO__(0x64A0, ## __VA_ARGS__), \
+	MACRO__(0x64B0, ## __VA_ARGS__)
+
+#endif
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 54189e0e5f41..4db54e928b36 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -15,7 +15,6 @@
 #include <linux/mod_devicetable.h>
 #include <linux/property.h>
 #include <linux/uuid.h>
-#include <linux/fw_table.h>
 
 struct irq_domain;
 struct irq_domain_ops;
@@ -25,22 +24,13 @@ struct irq_domain_ops;
 #endif
 #include <acpi/acpi.h>
 
-#ifdef CONFIG_ACPI_TABLE_LIB
-#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI)
-#define __init_or_acpilib
-#define __initdata_or_acpilib
-#else
-#define EXPORT_SYMBOL_ACPI_LIB(x)
-#define __init_or_acpilib __init
-#define __initdata_or_acpilib __initdata
-#endif
-
 #ifdef	CONFIG_ACPI
 
 #include <linux/list.h>
 #include <linux/dynamic_debug.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/fw_table.h>
 
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
@@ -48,6 +38,16 @@ struct irq_domain_ops;
 #include <acpi/acpi_io.h>
 #include <asm/acpi.h>
 
+#ifdef CONFIG_ACPI_TABLE_LIB
+#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI)
+#define __init_or_acpilib
+#define __initdata_or_acpilib
+#else
+#define EXPORT_SYMBOL_ACPI_LIB(x)
+#define __init_or_acpilib __init
+#define __initdata_or_acpilib __initdata
+#endif
+
 static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
 {
 	return adev ? adev->handle : NULL;
diff --git a/include/linux/acpi_amd_wbrf.h b/include/linux/acpi_amd_wbrf.h
new file mode 100644
index 000000000000..898f31d536d4
--- /dev/null
+++ b/include/linux/acpi_amd_wbrf.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Wifi Band Exclusion Interface (AMD ACPI Implementation)
+ * Copyright (C) 2023 Advanced Micro Devices
+ */
+
+#ifndef _ACPI_AMD_WBRF_H
+#define _ACPI_AMD_WBRF_H
+
+#include <linux/device.h>
+#include <linux/notifier.h>
+
+/* The maximum number of frequency band ranges */
+#define MAX_NUM_OF_WBRF_RANGES		11
+
+/* Record actions */
+#define WBRF_RECORD_ADD		0x0
+#define WBRF_RECORD_REMOVE	0x1
+
+/**
+ * struct freq_band_range - Wifi frequency band range definition
+ * @start: start frequency point (in Hz)
+ * @end: end frequency point (in Hz)
+ */
+struct freq_band_range {
+	u64		start;
+	u64		end;
+};
+
+/**
+ * struct wbrf_ranges_in_out - wbrf ranges info
+ * @num_of_ranges: total number of band ranges in this struct
+ * @band_list: array of Wifi band ranges
+ */
+struct wbrf_ranges_in_out {
+	u64			num_of_ranges;
+	struct freq_band_range	band_list[MAX_NUM_OF_WBRF_RANGES];
+};
+
+/**
+ * enum wbrf_notifier_actions - wbrf notifier actions index
+ * @WBRF_CHANGED: there was some frequency band updates. The consumers
+ *               should retrieve the latest active frequency bands.
+ */
+enum wbrf_notifier_actions {
+	WBRF_CHANGED,
+};
+
+#if IS_ENABLED(CONFIG_AMD_WBRF)
+bool acpi_amd_wbrf_supported_producer(struct device *dev);
+int acpi_amd_wbrf_add_remove(struct device *dev, uint8_t action, struct wbrf_ranges_in_out *in);
+bool acpi_amd_wbrf_supported_consumer(struct device *dev);
+int amd_wbrf_retrieve_freq_band(struct device *dev, struct wbrf_ranges_in_out *out);
+int amd_wbrf_register_notifier(struct notifier_block *nb);
+int amd_wbrf_unregister_notifier(struct notifier_block *nb);
+#else
+static inline
+bool acpi_amd_wbrf_supported_consumer(struct device *dev)
+{
+	return false;
+}
+
+static inline
+int acpi_amd_wbrf_add_remove(struct device *dev, uint8_t action, struct wbrf_ranges_in_out *in)
+{
+	return -ENODEV;
+}
+
+static inline
+bool acpi_amd_wbrf_supported_producer(struct device *dev)
+{
+	return false;
+}
+static inline
+int amd_wbrf_retrieve_freq_band(struct device *dev, struct wbrf_ranges_in_out *out)
+{
+	return -ENODEV;
+}
+static inline
+int amd_wbrf_register_notifier(struct notifier_block *nb)
+{
+	return -ENODEV;
+}
+static inline
+int amd_wbrf_unregister_notifier(struct notifier_block *nb)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_AMD_WBRF */
+
+#endif /* _ACPI_AMD_WBRF_H */
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
index 446394f84606..6ad02ad9c7b4 100644
--- a/include/linux/amd-pstate.h
+++ b/include/linux/amd-pstate.h
@@ -70,6 +70,10 @@ struct amd_cpudata {
 	u32	nominal_perf;
 	u32	lowest_nonlinear_perf;
 	u32	lowest_perf;
+	u32     min_limit_perf;
+	u32     max_limit_perf;
+	u32     min_limit_freq;
+	u32     max_limit_freq;
 
 	u32	max_freq;
 	u32	min_freq;
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index 1abedb5b2e48..3d0fde57ba90 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -209,6 +209,8 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; }
 #define module_ffa_driver(__ffa_driver)	\
 	module_driver(__ffa_driver, ffa_register, ffa_unregister)
 
+extern struct bus_type ffa_bus_type;
+
 /* FFA transport related */
 struct ffa_partition_info {
 	u16 id;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d5c5e59ddbd2..b29ebd53417d 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -49,9 +49,10 @@ struct block_device {
 	bool			bd_write_holder;
 	bool			bd_has_submit_bio;
 	dev_t			bd_dev;
+	struct inode		*bd_inode;	/* will die */
+
 	atomic_t		bd_openers;
 	spinlock_t		bd_size_lock; /* for bd_inode->i_size updates */
-	struct inode *		bd_inode;	/* will die */
 	void *			bd_claiming;
 	void *			bd_holder;
 	const struct blk_holder_ops *bd_holder_ops;
@@ -69,6 +70,7 @@ struct block_device {
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	bool			bd_make_it_fail;
 #endif
+	bool			bd_ro_warned;
 	/*
 	 * keep this out-of-line as it's both big and not needed in the fast
 	 * path
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6762dac3ef76..cff5bb08820e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3175,6 +3175,9 @@ enum bpf_text_poke_type {
 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 		       void *addr1, void *addr2);
 
+void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
+			       struct bpf_prog *new, struct bpf_prog *old);
+
 void *bpf_arch_text_copy(void *dst, void *src, size_t len);
 int bpf_arch_text_invalidate(void *dst, size_t len);
 
diff --git a/include/linux/closure.h b/include/linux/closure.h
index de7bb47d8a46..c554c6a08768 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -104,7 +104,7 @@
 
 struct closure;
 struct closure_syncer;
-typedef void (closure_fn) (struct closure *);
+typedef void (closure_fn) (struct work_struct *);
 extern struct dentry *bcache_debug;
 
 struct closure_waitlist {
@@ -254,7 +254,7 @@ static inline void closure_queue(struct closure *cl)
 		INIT_WORK(&cl->work, cl->work.func);
 		BUG_ON(!queue_work(wq, &cl->work));
 	} else
-		cl->fn(cl);
+		cl->fn(&cl->work);
 }
 
 /**
@@ -309,6 +309,11 @@ static inline void closure_wake_up(struct closure_waitlist *list)
 	__closure_wake_up(list);
 }
 
+#define CLOSURE_CALLBACK(name)	void name(struct work_struct *ws)
+#define closure_type(name, type, member)				\
+	struct closure *cl = container_of(ws, struct closure, work);	\
+	type *name = container_of(cl, type, member)
+
 /**
  * continue_at - jump to another function with barrier
  *
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index ea2d919fd9c7..c9c65b132c0f 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -171,6 +171,25 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
 ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf,
 			      size_t count, loff_t *ppos);
 
+/**
+ * struct debugfs_cancellation - cancellation data
+ * @list: internal, for keeping track
+ * @cancel: callback to call
+ * @cancel_data: extra data for the callback to call
+ */
+struct debugfs_cancellation {
+	struct list_head list;
+	void (*cancel)(struct dentry *, void *);
+	void *cancel_data;
+};
+
+void __acquires(cancellation)
+debugfs_enter_cancellation(struct file *file,
+			   struct debugfs_cancellation *cancellation);
+void __releases(cancellation)
+debugfs_leave_cancellation(struct file *file,
+			   struct debugfs_cancellation *cancellation);
+
 #else
 
 #include <linux/err.h>
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index ebe78bd3d121..b3772edca2e6 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -499,6 +499,21 @@ static inline bool dma_fence_is_later(struct dma_fence *f1,
 }
 
 /**
+ * dma_fence_is_later_or_same - return true if f1 is later or same as f2
+ * @f1: the first fence from the same context
+ * @f2: the second fence from the same context
+ *
+ * Returns true if f1 is chronologically later than f2 or the same fence. Both
+ * fences must be from the same context, since a seqno is not re-used across
+ * contexts.
+ */
+static inline bool dma_fence_is_later_or_same(struct dma_fence *f1,
+					      struct dma_fence *f2)
+{
+	return f1 == f2 || dma_fence_is_later(f1, f2);
+}
+
+/**
  * dma_fence_later - return the chronologically later fence
  * @f1:	the first fence from the same context
  * @f2:	the second fence from the same context
diff --git a/include/linux/fw_table.h b/include/linux/fw_table.h
index ff8fa58d5818..ca49947f0a77 100644
--- a/include/linux/fw_table.h
+++ b/include/linux/fw_table.h
@@ -25,9 +25,6 @@ struct acpi_subtable_proc {
 	int count;
 };
 
-#include <linux/acpi.h>
-#include <acpi/acpi.h>
-
 union acpi_subtable_headers {
 	struct acpi_subtable_header common;
 	struct acpi_hmat_structure hmat;
diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
index 86ea7c63a0d2..f316c8d0f3fc 100644
--- a/include/linux/habanalabs/cpucp_if.h
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -659,6 +659,12 @@ enum pq_init_status {
  *       number (nonce) provided by the host to prevent replay attacks.
  *       public key and certificate also provided as part of the FW response.
  *
+ * CPUCP_PACKET_INFO_SIGNED_GET -
+ *       Get the device information signed by the Trusted Platform device.
+ *       device info data is also hashed with some unique number (nonce) provided
+ *       by the host to prevent replay attacks. public key and certificate also
+ *       provided as part of the FW response.
+ *
  * CPUCP_PACKET_MONITOR_DUMP_GET -
  *       Get monitors registers dump from the CpuCP kernel.
  *       The CPU will put the registers dump in the a buffer allocated by the driver
@@ -733,7 +739,7 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_ENGINE_CORE_ASID_SET,	/* internal */
 	CPUCP_PACKET_RESERVED2,			/* not used */
 	CPUCP_PACKET_SEC_ATTEST_GET,		/* internal */
-	CPUCP_PACKET_RESERVED3,			/* not used */
+	CPUCP_PACKET_INFO_SIGNED_GET,		/* internal */
 	CPUCP_PACKET_RESERVED4,			/* not used */
 	CPUCP_PACKET_MONITOR_DUMP_GET,		/* debugfs */
 	CPUCP_PACKET_RESERVED5,			/* not used */
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 4cacc0e43b51..be20cff4ba73 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -454,7 +454,7 @@ static inline void memcpy_from_folio(char *to, struct folio *folio,
 		memcpy(to, from, chunk);
 		kunmap_local(from);
 
-		from += chunk;
+		to += chunk;
 		offset += chunk;
 		len -= chunk;
 	} while (len > 0);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d3acecc5db4b..236ec7b63c54 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -1268,10 +1268,7 @@ static inline bool __vma_shareable_lock(struct vm_area_struct *vma)
 	return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data;
 }
 
-static inline bool __vma_private_lock(struct vm_area_struct *vma)
-{
-	return (!(vma->vm_flags & VM_MAYSHARE)) && vma->vm_private_data;
-}
+bool __vma_private_lock(struct vm_area_struct *vma);
 
 /*
  * Safe version of huge_pte_offset() to check the locks.  See comments
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 958771bac9c0..c2ac9e9e7ee9 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2830,12 +2830,14 @@ ieee80211_he_oper_size(const u8 *he_oper_ie)
 static inline const struct ieee80211_he_6ghz_oper *
 ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper)
 {
-	const u8 *ret = (const void *)&he_oper->optional;
+	const u8 *ret;
 	u32 he_oper_params;
 
 	if (!he_oper)
 		return NULL;
 
+	ret = (const void *)&he_oper->optional;
+
 	he_oper_params = le32_to_cpu(he_oper->he_oper_params);
 
 	if (!(he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO))
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index d3009d56af0b..805bb635cdf5 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -340,6 +340,9 @@ struct io_ring_ctx {
 
 	struct list_head	io_buffers_cache;
 
+	/* deferred free list, protected by ->uring_lock */
+	struct hlist_head	io_buf_list;
+
 	/* Keep this last, we don't need it for the fast path */
 	struct wait_queue_head		poll_wq;
 	struct io_restriction		restrictions;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ec289c1016f5..6291aa7b079b 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -845,6 +845,7 @@ static inline void dev_iommu_priv_set(struct device *dev, void *priv)
 	dev->iommu->priv = priv;
 }
 
+extern struct mutex iommu_probe_device_lock;
 int iommu_probe_device(struct device *dev);
 
 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index ab1da3142b06..0ff44d6633e3 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -139,7 +139,7 @@ static inline bool kprobe_ftrace(struct kprobe *p)
  *
  */
 struct kretprobe_holder {
-	struct kretprobe	*rp;
+	struct kretprobe __rcu *rp;
 	struct objpool_head	pool;
 };
 
@@ -197,10 +197,8 @@ extern int arch_trampoline_kprobe(struct kprobe *p);
 #ifdef CONFIG_KRETPROBE_ON_RETHOOK
 static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri)
 {
-	RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(),
-		"Kretprobe is accessed from instance under preemptive context");
-
-	return (struct kretprobe *)READ_ONCE(ri->node.rethook->data);
+	/* rethook::data is non-changed field, so that you can access it freely. */
+	return (struct kretprobe *)ri->node.rethook->data;
 }
 static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri)
 {
@@ -245,10 +243,7 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs,
 
 static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri)
 {
-	RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(),
-		"Kretprobe is accessed from instance under preemptive context");
-
-	return READ_ONCE(ri->rph->rp);
+	return rcu_dereference_check(ri->rph->rp, rcu_read_lock_any_held());
 }
 
 static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri)
diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 63e630276499..ab1c7deff118 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -114,6 +114,9 @@
 /* Charging mode - 1=Barrel, 2=USB */
 #define ASUS_WMI_DEVID_CHARGE_MODE	0x0012006C
 
+/* MCU powersave mode */
+#define ASUS_WMI_DEVID_MCU_POWERSAVE   0x001200E2
+
 /* epu is connected? 1 == true */
 #define ASUS_WMI_DEVID_EGPU_CONNECTED	0x00090018
 /* egpu on/off */
diff --git a/include/linux/rethook.h b/include/linux/rethook.h
index ce69b2b7bc35..ba60962805f6 100644
--- a/include/linux/rethook.h
+++ b/include/linux/rethook.h
@@ -28,7 +28,12 @@ typedef void (*rethook_handler_t) (struct rethook_node *, void *, unsigned long,
  */
 struct rethook {
 	void			*data;
-	rethook_handler_t	handler;
+	/*
+	 * To avoid sparse warnings, this uses a raw function pointer with
+	 * __rcu, instead of rethook_handler_t. But this must be same as
+	 * rethook_handler_t.
+	 */
+	void (__rcu *handler) (struct rethook_node *, void *, unsigned long, struct pt_regs *);
 	struct objpool_head	pool;
 	struct rcu_head		rcu;
 };
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index c1637515a8a4..c953b8c0d2f4 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -106,6 +106,7 @@ struct sk_psock {
 	struct mutex			work_mutex;
 	struct sk_psock_work_state	work_state;
 	struct delayed_work		work;
+	struct sock			*sk_pair;
 	struct rcu_work			rwork;
 };
 
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 0b4658a7eceb..dee5ad6e48c5 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -175,6 +175,7 @@ struct stmmac_fpe_cfg {
 	bool hs_enable;				/* FPE handshake enable */
 	enum stmmac_fpe_state lp_fpe_state;	/* Link Partner FPE state */
 	enum stmmac_fpe_state lo_fpe_state;	/* Local station FPE state */
+	u32 fpe_csr;				/* MAC_FPE_CTRL_STS reg cache */
 };
 
 struct stmmac_safety_feature_cfg {
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 68f3d315d2e1..b646b574b060 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -169,7 +169,7 @@ struct tcp_request_sock {
 #ifdef CONFIG_TCP_AO
 	u8				ao_keyid;
 	u8				ao_rcv_next;
-	u8				maclen;
+	bool				used_tcp_ao;
 #endif
 };
 
@@ -180,14 +180,10 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
 
 static inline bool tcp_rsk_used_ao(const struct request_sock *req)
 {
-	/* The real length of MAC is saved in the request socket,
-	 * signing anything with zero-length makes no sense, so here is
-	 * a little hack..
-	 */
 #ifndef CONFIG_TCP_AO
 	return false;
 #else
-	return tcp_rsk(req)->maclen != 0;
+	return tcp_rsk(req)->used_tcp_ao;
 #endif
 }
 
diff --git a/include/linux/units.h b/include/linux/units.h
index ff1bd6b5f5b3..45110daaf8d3 100644
--- a/include/linux/units.h
+++ b/include/linux/units.h
@@ -2,6 +2,7 @@
 #ifndef _LINUX_UNITS_H
 #define _LINUX_UNITS_H
 
+#include <linux/bits.h>
 #include <linux/math.h>
 
 /* Metric prefixes in accordance with Système international (d'unités) */
diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h
index 287e9d83fb8b..33a4c146dc19 100644
--- a/include/linux/usb/r8152.h
+++ b/include/linux/usb/r8152.h
@@ -30,6 +30,7 @@
 #define VENDOR_ID_NVIDIA		0x0955
 #define VENDOR_ID_TPLINK		0x2357
 #define VENDOR_ID_DLINK			0x2001
+#define VENDOR_ID_ASUS			0x0b05
 
 #if IS_REACHABLE(CONFIG_USB_RTL8152)
 extern u8 rtl8152_get_version(struct usb_interface *intf);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 454e9295970c..a65b2513f8cd 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -289,16 +289,12 @@ void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes,
 /*
  * External user API
  */
-#if IS_ENABLED(CONFIG_VFIO_GROUP)
 struct iommu_group *vfio_file_iommu_group(struct file *file);
+
+#if IS_ENABLED(CONFIG_VFIO_GROUP)
 bool vfio_file_is_group(struct file *file);
 bool vfio_file_has_dev(struct file *file, struct vfio_device *device);
 #else
-static inline struct iommu_group *vfio_file_iommu_group(struct file *file)
-{
-	return NULL;
-}
-
 static inline bool vfio_file_is_group(struct file *file)
 {
 	return false;
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 824c258143a3..49c4640027d8 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -75,6 +75,7 @@ struct unix_sock {
 };
 
 #define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk)
+#define unix_peer(sk) (unix_sk(sk)->peer)
 
 #define peer_wait peer_wq.wait
 
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index b137a33a1b68..4ecfb06c413d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -9299,4 +9299,50 @@ bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap,
  */
 void cfg80211_links_removed(struct net_device *dev, u16 link_mask);
 
+#ifdef CONFIG_CFG80211_DEBUGFS
+/**
+ * wiphy_locked_debugfs_read - do a locked read in debugfs
+ * @wiphy: the wiphy to use
+ * @file: the file being read
+ * @buf: the buffer to fill and then read from
+ * @bufsize: size of the buffer
+ * @userbuf: the user buffer to copy to
+ * @count: read count
+ * @ppos: read position
+ * @handler: the read handler to call (under wiphy lock)
+ * @data: additional data to pass to the read handler
+ */
+ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file,
+				  char *buf, size_t bufsize,
+				  char __user *userbuf, size_t count,
+				  loff_t *ppos,
+				  ssize_t (*handler)(struct wiphy *wiphy,
+						     struct file *file,
+						     char *buf,
+						     size_t bufsize,
+						     void *data),
+				  void *data);
+
+/**
+ * wiphy_locked_debugfs_write - do a locked write in debugfs
+ * @wiphy: the wiphy to use
+ * @file: the file being written to
+ * @buf: the buffer to copy the user data to
+ * @bufsize: size of the buffer
+ * @userbuf: the user buffer to copy from
+ * @count: read count
+ * @handler: the write handler to call (under wiphy lock)
+ * @data: additional data to pass to the write handler
+ */
+ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file,
+				   char *buf, size_t bufsize,
+				   const char __user *userbuf, size_t count,
+				   ssize_t (*handler)(struct wiphy *wiphy,
+						      struct file *file,
+						      char *buf,
+						      size_t count,
+						      void *data),
+				   void *data);
+#endif
+
 #endif /* __NET_CFG80211_H */
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index e18a4c0d69ee..c53244f20437 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -12,10 +12,12 @@
  * struct genl_multicast_group - generic netlink multicast group
  * @name: name of the multicast group, names are per-family
  * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM)
+ * @cap_sys_admin: whether %CAP_SYS_ADMIN is required for binding
  */
 struct genl_multicast_group {
 	char			name[GENL_NAMSIZ];
 	u8			flags;
+	u8			cap_sys_admin:1;
 };
 
 struct genl_split_ops;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 07022bb0d44d..0d28172193fa 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -162,7 +162,7 @@ struct neighbour {
 	struct rcu_head		rcu;
 	struct net_device	*dev;
 	netdevice_tracker	dev_tracker;
-	u8			primary_key[0];
+	u8			primary_key[];
 } __randomize_layout;
 
 struct neigh_ops {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d2f0736b76b8..144ba48bb07b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1514,17 +1514,22 @@ static inline int tcp_full_space(const struct sock *sk)
 	return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
 }
 
-static inline void tcp_adjust_rcv_ssthresh(struct sock *sk)
+static inline void __tcp_adjust_rcv_ssthresh(struct sock *sk, u32 new_ssthresh)
 {
 	int unused_mem = sk_unused_reserved_mem(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
+	tp->rcv_ssthresh = min(tp->rcv_ssthresh, new_ssthresh);
 	if (unused_mem)
 		tp->rcv_ssthresh = max_t(u32, tp->rcv_ssthresh,
 					 tcp_win_from_space(sk, unused_mem));
 }
 
+static inline void tcp_adjust_rcv_ssthresh(struct sock *sk)
+{
+	__tcp_adjust_rcv_ssthresh(sk, 4U * tcp_sk(sk)->advmss);
+}
+
 void tcp_cleanup_rbuf(struct sock *sk, int copied);
 void __tcp_cleanup_rbuf(struct sock *sk, int copied);
 
diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
index b56be10838f0..647781080613 100644
--- a/include/net/tcp_ao.h
+++ b/include/net/tcp_ao.h
@@ -62,11 +62,17 @@ static inline int tcp_ao_maclen(const struct tcp_ao_key *key)
 	return key->maclen;
 }
 
+/* Use tcp_ao_len_aligned() for TCP header calculations */
 static inline int tcp_ao_len(const struct tcp_ao_key *key)
 {
 	return tcp_ao_maclen(key) + sizeof(struct tcp_ao_hdr);
 }
 
+static inline int tcp_ao_len_aligned(const struct tcp_ao_key *key)
+{
+	return round_up(tcp_ao_len(key), 4);
+}
+
 static inline unsigned int tcp_ao_digest_size(struct tcp_ao_key *key)
 {
 	return key->digest_size;
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 95896472a82b..565a85044541 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -77,6 +77,13 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
 {
 	__rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl,
 				umem->sgt_append.sgt.nents, pgsz);
+	biter->__sg_advance = ib_umem_offset(umem) & ~(pgsz - 1);
+	biter->__sg_numblocks = ib_umem_num_dma_blocks(umem, pgsz);
+}
+
+static inline bool __rdma_umem_block_iter_next(struct ib_block_iter *biter)
+{
+	return __rdma_block_iter_next(biter) && biter->__sg_numblocks--;
 }
 
 /**
@@ -92,7 +99,7 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
  */
 #define rdma_umem_for_each_dma_block(umem, biter, pgsz)                        \
 	for (__rdma_umem_block_iter_start(biter, umem, pgsz);                  \
-	     __rdma_block_iter_next(biter);)
+	     __rdma_umem_block_iter_next(biter);)
 
 #ifdef CONFIG_INFINIBAND_USER_MEM
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index fb1a2d6b1969..b7b6b58dd348 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2850,6 +2850,7 @@ struct ib_block_iter {
 	/* internal states */
 	struct scatterlist *__sg;	/* sg holding the current aligned block */
 	dma_addr_t __dma_addr;		/* unaligned DMA address of this block */
+	size_t __sg_numblocks;		/* ib_umem_num_dma_blocks() */
 	unsigned int __sg_nents;	/* number of SG entries */
 	unsigned int __sg_advance;	/* number of bytes to advance in sg in next step */
 	unsigned int __pg_bit;		/* alignment of current block */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 10480eb582b2..5ec1e71a09de 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -167,19 +167,25 @@ struct scsi_device {
 	 * power state for system suspend/resume (suspend to RAM and
 	 * hibernation) operations.
 	 */
-	bool manage_system_start_stop;
+	unsigned manage_system_start_stop:1;
 
 	/*
 	 * If true, let the high-level device driver (sd) manage the device
 	 * power state for runtime device suspand and resume operations.
 	 */
-	bool manage_runtime_start_stop;
+	unsigned manage_runtime_start_stop:1;
 
 	/*
 	 * If true, let the high-level device driver (sd) manage the device
 	 * power state for system shutdown (power off) operations.
 	 */
-	bool manage_shutdown;
+	unsigned manage_shutdown:1;
+
+	/*
+	 * If set and if the device is runtime suspended, ask the high-level
+	 * device driver (sd) to force a runtime resume of the device.
+	 */
+	unsigned force_runtime_start_on_system_start:1;
 
 	unsigned removable:1;
 	unsigned changed:1;	/* Data invalid due to media change */
diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h
index 043f8ac65dbf..68e053fe7340 100644
--- a/include/sound/cs35l41.h
+++ b/include/sound/cs35l41.h
@@ -906,6 +906,6 @@ int cs35l41_init_boost(struct device *dev, struct regmap *regmap,
 bool cs35l41_safe_reset(struct regmap *regmap, enum cs35l41_boost_type b_type);
 int cs35l41_mdsync_up(struct regmap *regmap);
 int cs35l41_global_enable(struct device *dev, struct regmap *regmap, enum cs35l41_boost_type b_type,
-			  int enable, bool firmware_running);
+			  int enable, struct cs_dsp *dsp);
 
 #endif /* __CS35L41_H */
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index aee3d40c96f0..7040e7ea80c7 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -846,6 +846,14 @@ struct drm_color_ctm {
 	__u64 matrix[9];
 };
 
+struct drm_color_ctm_3x4 {
+	/*
+	 * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude
+	 * (not two's complement!) format.
+	 */
+	__u64 matrix[12];
+};
+
 struct drm_color_lut {
 	/*
 	 * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and
diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h
index 347c7b62e60e..a512dc4cffd0 100644
--- a/include/uapi/drm/habanalabs_accel.h
+++ b/include/uapi/drm/habanalabs_accel.h
@@ -846,6 +846,7 @@ enum hl_server_type {
 #define HL_INFO_HW_ERR_EVENT			36
 #define HL_INFO_FW_ERR_EVENT			37
 #define HL_INFO_USER_ENGINE_ERR_EVENT		38
+#define HL_INFO_DEV_SIGNED			40
 
 #define HL_INFO_VERSION_MAX_LEN			128
 #define HL_INFO_CARD_NAME_MAX_LEN		16
@@ -1256,6 +1257,7 @@ struct hl_info_dev_memalloc_page_sizes {
 #define SEC_SIGNATURE_BUF_SZ	255	/* (256 - 1) 1 byte used for size */
 #define SEC_PUB_DATA_BUF_SZ	510	/* (512 - 2) 2 bytes used for size */
 #define SEC_CERTIFICATE_BUF_SZ	2046	/* (2048 - 2) 2 bytes used for size */
+#define SEC_DEV_INFO_BUF_SZ	5120
 
 /*
  * struct hl_info_sec_attest - attestation report of the boot
@@ -1290,6 +1292,32 @@ struct hl_info_sec_attest {
 	__u8 pad0[2];
 };
 
+/*
+ * struct hl_info_signed - device information signed by a secured device.
+ * @nonce: number only used once. random number provided by host. this also passed to the quote
+ *         command as a qualifying data.
+ * @pub_data_len: length of the public data (bytes)
+ * @certificate_len: length of the certificate (bytes)
+ * @info_sig_len: length of the attestation signature (bytes)
+ * @public_data: public key info signed info data (outPublic + name + qualifiedName)
+ * @certificate: certificate for the signing key
+ * @info_sig: signature of the info + nonce data.
+ * @dev_info_len: length of device info (bytes)
+ * @dev_info: device info as byte array.
+ */
+struct hl_info_signed {
+	__u32 nonce;
+	__u16 pub_data_len;
+	__u16 certificate_len;
+	__u8 info_sig_len;
+	__u8 public_data[SEC_PUB_DATA_BUF_SZ];
+	__u8 certificate[SEC_CERTIFICATE_BUF_SZ];
+	__u8 info_sig[SEC_SIGNATURE_BUF_SZ];
+	__u16 dev_info_len;
+	__u8 dev_info[SEC_DEV_INFO_BUF_SZ];
+	__u8 pad[2];
+};
+
 /**
  * struct hl_page_fault_info - page fault information.
  * @timestamp: timestamp of page fault.
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 218edb0a96f8..fd4f9574d177 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -693,7 +693,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_FENCE	 44
 
 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
- * user specified bufffers for post-mortem debugging of GPU hangs. See
+ * user-specified buffers for post-mortem debugging of GPU hangs. See
  * EXEC_OBJECT_CAPTURE.
  */
 #define I915_PARAM_HAS_EXEC_CAPTURE	 45
@@ -1606,7 +1606,7 @@ struct drm_i915_gem_busy {
 	 * is accurate.
 	 *
 	 * The returned dword is split into two fields to indicate both
-	 * the engine classess on which the object is being read, and the
+	 * the engine classes on which the object is being read, and the
 	 * engine class on which it is currently being written (if any).
 	 *
 	 * The low word (bits 0:15) indicate if the object is being written
@@ -1815,7 +1815,7 @@ struct drm_i915_gem_madvise {
 	__u32 handle;
 
 	/* Advice: either the buffer will be needed again in the near future,
-	 *         or wont be and could be discarded under memory pressure.
+	 *         or won't be and could be discarded under memory pressure.
 	 */
 	__u32 madv;
 
@@ -3246,7 +3246,7 @@ struct drm_i915_query_topology_info {
  * 	// enough to hold our array of engines. The kernel will fill out the
  * 	// item.length for us, which is the number of bytes we need.
  * 	//
- * 	// Alternatively a large buffer can be allocated straight away enabling
+ *	// Alternatively a large buffer can be allocated straightaway enabling
  * 	// querying in one pass, in which case item.length should contain the
  * 	// length of the provided buffer.
  * 	err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
@@ -3256,7 +3256,7 @@ struct drm_i915_query_topology_info {
  * 	// Now that we allocated the required number of bytes, we call the ioctl
  * 	// again, this time with the data_ptr pointing to our newly allocated
  * 	// blob, which the kernel can then populate with info on all engines.
- * 	item.data_ptr = (uintptr_t)&info,
+ *	item.data_ptr = (uintptr_t)&info;
  *
  * 	err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
  * 	if (err) ...
@@ -3286,7 +3286,7 @@ struct drm_i915_query_topology_info {
 /**
  * struct drm_i915_engine_info
  *
- * Describes one engine and it's capabilities as known to the driver.
+ * Describes one engine and its capabilities as known to the driver.
  */
 struct drm_i915_engine_info {
 	/** @engine: Engine class and instance. */
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 6c34272a13fd..d8a6b3472760 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -86,6 +86,7 @@ struct drm_msm_timespec {
 #define MSM_PARAM_CMDLINE    0x0d  /* WO: override for task cmdline */
 #define MSM_PARAM_VA_START   0x0e  /* RO: start of valid GPU iova range */
 #define MSM_PARAM_VA_SIZE    0x0f  /* RO: size of valid GPU iova range (bytes) */
+#define MSM_PARAM_HIGHEST_BANK_BIT 0x10 /* RO */
 
 /* For backwards compat.  The original support for preemption was based on
  * a single ring per priority level so # of priority levels equals the #
@@ -139,6 +140,8 @@ struct drm_msm_gem_new {
 #define MSM_INFO_GET_NAME	0x03   /* get debug name, returned by pointer */
 #define MSM_INFO_SET_IOVA	0x04   /* set the iova, passed by value */
 #define MSM_INFO_GET_FLAGS	0x05   /* get the MSM_BO_x flags */
+#define MSM_INFO_SET_METADATA	0x06   /* set userspace metadata */
+#define MSM_INFO_GET_METADATA	0x07   /* get userspace metadata */
 
 struct drm_msm_gem_info {
 	__u32 handle;         /* in */
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
new file mode 100644
index 000000000000..9fa3ae324731
--- /dev/null
+++ b/include/uapi/drm/xe_drm.h
@@ -0,0 +1,1347 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _UAPI_XE_DRM_H_
+#define _UAPI_XE_DRM_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Please note that modifications to all structs defined here are
+ * subject to backwards-compatibility constraints.
+ * Sections in this file are organized as follows:
+ *   1. IOCTL definition
+ *   2. Extension definition and helper structs
+ *   3. IOCTL's Query structs in the order of the Query's entries.
+ *   4. The rest of IOCTL structs in the order of IOCTL declaration.
+ */
+
+/**
+ * DOC: Xe Device Block Diagram
+ *
+ * The diagram below represents a high-level simplification of a discrete
+ * GPU supported by the Xe driver. It shows some device components which
+ * are necessary to understand this API, as well as how their relations
+ * to each other. This diagram does not represent real hardware::
+ *
+ *   ┌──────────────────────────────────────────────────────────────────┐
+ *   │ ┌──────────────────────────────────────────────────┐ ┌─────────┐ │
+ *   │ │        ┌───────────────────────┐   ┌─────┐       │ │ ┌─────┐ │ │
+ *   │ │        │         VRAM0         ├───┤ ... │       │ │ │VRAM1│ │ │
+ *   │ │        └───────────┬───────────┘   └─GT1─┘       │ │ └──┬──┘ │ │
+ *   │ │ ┌──────────────────┴───────────────────────────┐ │ │ ┌──┴──┐ │ │
+ *   │ │ │ ┌─────────────────────┐  ┌─────────────────┐ │ │ │ │     │ │ │
+ *   │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │ │ │EU│ │EU│ │EU│ │EU│ │  │ │RCS0 │ │BCS0 │ │ │ │ │ │     │ │ │
+ *   │ │ │ │ └──┘ └──┘ └──┘ └──┘ │  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │ │ │EU│ │EU│ │EU│ │EU│ │  │ │VCS0 │ │VCS1 │ │ │ │ │ │     │ │ │
+ *   │ │ │ │ └──┘ └──┘ └──┘ └──┘ │  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │ │ │EU│ │EU│ │EU│ │EU│ │  │ │VECS0│ │VECS1│ │ │ │ │ │ ... │ │ │
+ *   │ │ │ │ └──┘ └──┘ └──┘ └──┘ │  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │ │ │EU│ │EU│ │EU│ │EU│ │  │ │CCS0 │ │CCS1 │ │ │ │ │ │     │ │ │
+ *   │ │ │ │ └──┘ └──┘ └──┘ └──┘ │  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ └─────────DSS─────────┘  │ ┌─────┐ ┌─────┐ │ │ │ │ │     │ │ │
+ *   │ │ │                          │ │CCS2 │ │CCS3 │ │ │ │ │ │     │ │ │
+ *   │ │ │ ┌─────┐ ┌─────┐ ┌─────┐  │ └─────┘ └─────┘ │ │ │ │ │     │ │ │
+ *   │ │ │ │ ... │ │ ... │ │ ... │  │                 │ │ │ │ │     │ │ │
+ *   │ │ │ └─DSS─┘ └─DSS─┘ └─DSS─┘  └─────Engines─────┘ │ │ │ │     │ │ │
+ *   │ │ └───────────────────────────GT0────────────────┘ │ │ └─GT2─┘ │ │
+ *   │ └────────────────────────────Tile0─────────────────┘ └─ Tile1──┘ │
+ *   └─────────────────────────────Device0───────┬──────────────────────┘
+ *                                               │
+ *                        ───────────────────────┴────────── PCI bus
+ */
+
+/**
+ * DOC: Xe uAPI Overview
+ *
+ * This section aims to describe the Xe's IOCTL entries, its structs, and other
+ * Xe related uAPI such as uevents and PMU (Platform Monitoring Unit) related
+ * entries and usage.
+ *
+ * List of supported IOCTLs:
+ *  - &DRM_IOCTL_XE_DEVICE_QUERY
+ *  - &DRM_IOCTL_XE_GEM_CREATE
+ *  - &DRM_IOCTL_XE_GEM_MMAP_OFFSET
+ *  - &DRM_IOCTL_XE_VM_CREATE
+ *  - &DRM_IOCTL_XE_VM_DESTROY
+ *  - &DRM_IOCTL_XE_VM_BIND
+ *  - &DRM_IOCTL_XE_EXEC_QUEUE_CREATE
+ *  - &DRM_IOCTL_XE_EXEC_QUEUE_DESTROY
+ *  - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
+ *  - &DRM_IOCTL_XE_EXEC
+ *  - &DRM_IOCTL_XE_WAIT_USER_FENCE
+ */
+
+/*
+ * xe specific ioctls.
+ *
+ * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie
+ * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset
+ * against DRM_COMMAND_BASE and should be between [0x0, 0x60).
+ */
+#define DRM_XE_DEVICE_QUERY		0x00
+#define DRM_XE_GEM_CREATE		0x01
+#define DRM_XE_GEM_MMAP_OFFSET		0x02
+#define DRM_XE_VM_CREATE		0x03
+#define DRM_XE_VM_DESTROY		0x04
+#define DRM_XE_VM_BIND			0x05
+#define DRM_XE_EXEC_QUEUE_CREATE	0x06
+#define DRM_XE_EXEC_QUEUE_DESTROY	0x07
+#define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x08
+#define DRM_XE_EXEC			0x09
+#define DRM_XE_WAIT_USER_FENCE		0x0a
+/* Must be kept compact -- no holes */
+
+#define DRM_IOCTL_XE_DEVICE_QUERY		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
+#define DRM_IOCTL_XE_GEM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create)
+#define DRM_IOCTL_XE_GEM_MMAP_OFFSET		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset)
+#define DRM_IOCTL_XE_VM_CREATE			DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create)
+#define DRM_IOCTL_XE_VM_DESTROY			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy)
+#define DRM_IOCTL_XE_VM_BIND			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind)
+#define DRM_IOCTL_XE_EXEC_QUEUE_CREATE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_CREATE, struct drm_xe_exec_queue_create)
+#define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY		DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy)
+#define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
+#define DRM_IOCTL_XE_EXEC			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
+#define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
+
+/**
+ * DOC: Xe IOCTL Extensions
+ *
+ * Before detailing the IOCTLs and its structs, it is important to highlight
+ * that every IOCTL in Xe is extensible.
+ *
+ * Many interfaces need to grow over time. In most cases we can simply
+ * extend the struct and have userspace pass in more data. Another option,
+ * as demonstrated by Vulkan's approach to providing extensions for forward
+ * and backward compatibility, is to use a list of optional structs to
+ * provide those extra details.
+ *
+ * The key advantage to using an extension chain is that it allows us to
+ * redefine the interface more easily than an ever growing struct of
+ * increasing complexity, and for large parts of that interface to be
+ * entirely optional. The downside is more pointer chasing; chasing across
+ * the __user boundary with pointers encapsulated inside u64.
+ *
+ * Example chaining:
+ *
+ * .. code-block:: C
+ *
+ *	struct drm_xe_user_extension ext3 {
+ *		.next_extension = 0, // end
+ *		.name = ...,
+ *	};
+ *	struct drm_xe_user_extension ext2 {
+ *		.next_extension = (uintptr_t)&ext3,
+ *		.name = ...,
+ *	};
+ *	struct drm_xe_user_extension ext1 {
+ *		.next_extension = (uintptr_t)&ext2,
+ *		.name = ...,
+ *	};
+ *
+ * Typically the struct drm_xe_user_extension would be embedded in some uAPI
+ * struct, and in this case we would feed it the head of the chain(i.e ext1),
+ * which would then apply all of the above extensions.
+*/
+
+/**
+ * struct drm_xe_user_extension - Base class for defining a chain of extensions
+ */
+struct drm_xe_user_extension {
+	/**
+	 * @next_extension:
+	 *
+	 * Pointer to the next struct drm_xe_user_extension, or zero if the end.
+	 */
+	__u64 next_extension;
+
+	/**
+	 * @name: Name of the extension.
+	 *
+	 * Note that the name here is just some integer.
+	 *
+	 * Also note that the name space for this is not global for the whole
+	 * driver, but rather its scope/meaning is limited to the specific piece
+	 * of uAPI which has embedded the struct drm_xe_user_extension.
+	 */
+	__u32 name;
+
+	/**
+	 * @pad: MBZ
+	 *
+	 * All undefined bits must be zero.
+	 */
+	__u32 pad;
+};
+
+/**
+ * struct drm_xe_ext_set_property - Generic set property extension
+ *
+ * A generic struct that allows any of the Xe's IOCTL to be extended
+ * with a set_property operation.
+ */
+struct drm_xe_ext_set_property {
+	/** @base: base user extension */
+	struct drm_xe_user_extension base;
+
+	/** @property: property to set */
+	__u32 property;
+
+	/** @pad: MBZ */
+	__u32 pad;
+
+	/** @value: property value */
+	__u64 value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_engine_class_instance - instance of an engine class
+ *
+ * It is returned as part of the @drm_xe_engine, but it also is used as
+ * the input of engine selection for both @drm_xe_exec_queue_create and
+ * @drm_xe_query_engine_cycles
+ *
+ * The @engine_class can be:
+ *  - %DRM_XE_ENGINE_CLASS_RENDER
+ *  - %DRM_XE_ENGINE_CLASS_COPY
+ *  - %DRM_XE_ENGINE_CLASS_VIDEO_DECODE
+ *  - %DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE
+ *  - %DRM_XE_ENGINE_CLASS_COMPUTE
+ *  - %DRM_XE_ENGINE_CLASS_VM_BIND - Kernel only classes (not actual
+ *    hardware engine class). Used for creating ordered queues of VM
+ *    bind operations.
+ */
+struct drm_xe_engine_class_instance {
+#define DRM_XE_ENGINE_CLASS_RENDER		0
+#define DRM_XE_ENGINE_CLASS_COPY		1
+#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE	2
+#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE	3
+#define DRM_XE_ENGINE_CLASS_COMPUTE		4
+#define DRM_XE_ENGINE_CLASS_VM_BIND		5
+	/** @engine_class: engine class id */
+	__u16 engine_class;
+	/** @engine_instance: engine instance id */
+	__u16 engine_instance;
+	/** @gt_id: Unique ID of this GT within the PCI Device */
+	__u16 gt_id;
+	/** @pad: MBZ */
+	__u16 pad;
+};
+
+/**
+ * struct drm_xe_engine - describe hardware engine
+ */
+struct drm_xe_engine {
+	/** @instance: The @drm_xe_engine_class_instance */
+	struct drm_xe_engine_class_instance instance;
+
+	/** @reserved: Reserved */
+	__u64 reserved[3];
+};
+
+/**
+ * struct drm_xe_query_engines - describe engines
+ *
+ * If a query is made with a struct @drm_xe_device_query where .query
+ * is equal to %DRM_XE_DEVICE_QUERY_ENGINES, then the reply uses an array of
+ * struct @drm_xe_query_engines in .data.
+ */
+struct drm_xe_query_engines {
+	/** @num_engines: number of engines returned in @engines */
+	__u32 num_engines;
+	/** @pad: MBZ */
+	__u32 pad;
+	/** @engines: The returned engines for this device */
+	struct drm_xe_engine engines[];
+};
+
+/**
+ * enum drm_xe_memory_class - Supported memory classes.
+ */
+enum drm_xe_memory_class {
+	/** @DRM_XE_MEM_REGION_CLASS_SYSMEM: Represents system memory. */
+	DRM_XE_MEM_REGION_CLASS_SYSMEM = 0,
+	/**
+	 * @DRM_XE_MEM_REGION_CLASS_VRAM: On discrete platforms, this
+	 * represents the memory that is local to the device, which we
+	 * call VRAM. Not valid on integrated platforms.
+	 */
+	DRM_XE_MEM_REGION_CLASS_VRAM
+};
+
+/**
+ * struct drm_xe_mem_region - Describes some region as known to
+ * the driver.
+ */
+struct drm_xe_mem_region {
+	/**
+	 * @mem_class: The memory class describing this region.
+	 *
+	 * See enum drm_xe_memory_class for supported values.
+	 */
+	__u16 mem_class;
+	/**
+	 * @instance: The unique ID for this region, which serves as the
+	 * index in the placement bitmask used as argument for
+	 * &DRM_IOCTL_XE_GEM_CREATE
+	 */
+	__u16 instance;
+	/**
+	 * @min_page_size: Min page-size in bytes for this region.
+	 *
+	 * When the kernel allocates memory for this region, the
+	 * underlying pages will be at least @min_page_size in size.
+	 * Buffer objects with an allowable placement in this region must be
+	 * created with a size aligned to this value.
+	 * GPU virtual address mappings of (parts of) buffer objects that
+	 * may be placed in this region must also have their GPU virtual
+	 * address and range aligned to this value.
+	 * Affected IOCTLS will return %-EINVAL if alignment restrictions are
+	 * not met.
+	 */
+	__u32 min_page_size;
+	/**
+	 * @total_size: The usable size in bytes for this region.
+	 */
+	__u64 total_size;
+	/**
+	 * @used: Estimate of the memory used in bytes for this region.
+	 *
+	 * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable
+	 * accounting.  Without this the value here will always equal
+	 * zero.
+	 */
+	__u64 used;
+	/**
+	 * @cpu_visible_size: How much of this region can be CPU
+	 * accessed, in bytes.
+	 *
+	 * This will always be <= @total_size, and the remainder (if
+	 * any) will not be CPU accessible. If the CPU accessible part
+	 * is smaller than @total_size then this is referred to as a
+	 * small BAR system.
+	 *
+	 * On systems without small BAR (full BAR), the probed_size will
+	 * always equal the @total_size, since all of it will be CPU
+	 * accessible.
+	 *
+	 * Note this is only tracked for DRM_XE_MEM_REGION_CLASS_VRAM
+	 * regions (for other types the value here will always equal
+	 * zero).
+	 */
+	__u64 cpu_visible_size;
+	/**
+	 * @cpu_visible_used: Estimate of CPU visible memory used, in
+	 * bytes.
+	 *
+	 * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable
+	 * accounting. Without this the value here will always equal
+	 * zero.  Note this is only currently tracked for
+	 * DRM_XE_MEM_REGION_CLASS_VRAM regions (for other types the value
+	 * here will always be zero).
+	 */
+	__u64 cpu_visible_used;
+	/** @reserved: Reserved */
+	__u64 reserved[6];
+};
+
+/**
+ * struct drm_xe_query_mem_regions - describe memory regions
+ *
+ * If a query is made with a struct drm_xe_device_query where .query
+ * is equal to DRM_XE_DEVICE_QUERY_MEM_REGIONS, then the reply uses
+ * struct drm_xe_query_mem_regions in .data.
+ */
+struct drm_xe_query_mem_regions {
+	/** @num_mem_regions: number of memory regions returned in @mem_regions */
+	__u32 num_mem_regions;
+	/** @pad: MBZ */
+	__u32 pad;
+	/** @mem_regions: The returned memory regions for this device */
+	struct drm_xe_mem_region mem_regions[];
+};
+
+/**
+ * struct drm_xe_query_config - describe the device configuration
+ *
+ * If a query is made with a struct drm_xe_device_query where .query
+ * is equal to DRM_XE_DEVICE_QUERY_CONFIG, then the reply uses
+ * struct drm_xe_query_config in .data.
+ *
+ * The index in @info can be:
+ *  - %DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID - Device ID (lower 16 bits)
+ *    and the device revision (next 8 bits)
+ *  - %DRM_XE_QUERY_CONFIG_FLAGS - Flags describing the device
+ *    configuration, see list below
+ *
+ *    - %DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM - Flag is set if the device
+ *      has usable VRAM
+ *  - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment
+ *    required by this device, typically SZ_4K or SZ_64K
+ *  - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address
+ *  - %DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY - Value of the highest
+ *    available exec queue priority
+ */
+struct drm_xe_query_config {
+	/** @num_params: number of parameters returned in info */
+	__u32 num_params;
+
+	/** @pad: MBZ */
+	__u32 pad;
+
+#define DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID	0
+#define DRM_XE_QUERY_CONFIG_FLAGS			1
+	#define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM	(1 << 0)
+#define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT		2
+#define DRM_XE_QUERY_CONFIG_VA_BITS			3
+#define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY	4
+	/** @info: array of elements containing the config info */
+	__u64 info[];
+};
+
+/**
+ * struct drm_xe_gt - describe an individual GT.
+ *
+ * To be used with drm_xe_query_gt_list, which will return a list with all the
+ * existing GT individual descriptions.
+ * Graphics Technology (GT) is a subset of a GPU/tile that is responsible for
+ * implementing graphics and/or media operations.
+ *
+ * The index in @type can be:
+ *  - %DRM_XE_QUERY_GT_TYPE_MAIN
+ *  - %DRM_XE_QUERY_GT_TYPE_MEDIA
+ */
+struct drm_xe_gt {
+#define DRM_XE_QUERY_GT_TYPE_MAIN		0
+#define DRM_XE_QUERY_GT_TYPE_MEDIA		1
+	/** @type: GT type: Main or Media */
+	__u16 type;
+	/** @tile_id: Tile ID where this GT lives (Information only) */
+	__u16 tile_id;
+	/** @gt_id: Unique ID of this GT within the PCI Device */
+	__u16 gt_id;
+	/** @pad: MBZ */
+	__u16 pad[3];
+	/** @reference_clock: A clock frequency for timestamp */
+	__u32 reference_clock;
+	/**
+	 * @near_mem_regions: Bit mask of instances from
+	 * drm_xe_query_mem_regions that are nearest to the current engines
+	 * of this GT.
+	 * Each index in this mask refers directly to the struct
+	 * drm_xe_query_mem_regions' instance, no assumptions should
+	 * be made about order. The type of each region is described
+	 * by struct drm_xe_query_mem_regions' mem_class.
+	 */
+	__u64 near_mem_regions;
+	/**
+	 * @far_mem_regions: Bit mask of instances from
+	 * drm_xe_query_mem_regions that are far from the engines of this GT.
+	 * In general, they have extra indirections when compared to the
+	 * @near_mem_regions. For a discrete device this could mean system
+	 * memory and memory living in a different tile.
+	 * Each index in this mask refers directly to the struct
+	 * drm_xe_query_mem_regions' instance, no assumptions should
+	 * be made about order. The type of each region is described
+	 * by struct drm_xe_query_mem_regions' mem_class.
+	 */
+	__u64 far_mem_regions;
+	/** @reserved: Reserved */
+	__u64 reserved[8];
+};
+
+/**
+ * struct drm_xe_query_gt_list - A list with GT description items.
+ *
+ * If a query is made with a struct drm_xe_device_query where .query
+ * is equal to DRM_XE_DEVICE_QUERY_GT_LIST, then the reply uses struct
+ * drm_xe_query_gt_list in .data.
+ */
+struct drm_xe_query_gt_list {
+	/** @num_gt: number of GT items returned in gt_list */
+	__u32 num_gt;
+	/** @pad: MBZ */
+	__u32 pad;
+	/** @gt_list: The GT list returned for this device */
+	struct drm_xe_gt gt_list[];
+};
+
+/**
+ * struct drm_xe_query_topology_mask - describe the topology mask of a GT
+ *
+ * This is the hardware topology which reflects the internal physical
+ * structure of the GPU.
+ *
+ * If a query is made with a struct drm_xe_device_query where .query
+ * is equal to DRM_XE_DEVICE_QUERY_GT_TOPOLOGY, then the reply uses
+ * struct drm_xe_query_topology_mask in .data.
+ *
+ * The @type can be:
+ *  - %DRM_XE_TOPO_DSS_GEOMETRY - To query the mask of Dual Sub Slices
+ *    (DSS) available for geometry operations. For example a query response
+ *    containing the following in mask:
+ *    ``DSS_GEOMETRY    ff ff ff ff 00 00 00 00``
+ *    means 32 DSS are available for geometry.
+ *  - %DRM_XE_TOPO_DSS_COMPUTE - To query the mask of Dual Sub Slices
+ *    (DSS) available for compute operations. For example a query response
+ *    containing the following in mask:
+ *    ``DSS_COMPUTE    ff ff ff ff 00 00 00 00``
+ *    means 32 DSS are available for compute.
+ *  - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU)
+ *    available per Dual Sub Slices (DSS). For example a query response
+ *    containing the following in mask:
+ *    ``EU_PER_DSS    ff ff 00 00 00 00 00 00``
+ *    means each DSS has 16 EU.
+ */
+struct drm_xe_query_topology_mask {
+	/** @gt_id: GT ID the mask is associated with */
+	__u16 gt_id;
+
+#define DRM_XE_TOPO_DSS_GEOMETRY	(1 << 0)
+#define DRM_XE_TOPO_DSS_COMPUTE		(1 << 1)
+#define DRM_XE_TOPO_EU_PER_DSS		(1 << 2)
+	/** @type: type of mask */
+	__u16 type;
+
+	/** @num_bytes: number of bytes in requested mask */
+	__u32 num_bytes;
+
+	/** @mask: little-endian mask of @num_bytes */
+	__u8 mask[];
+};
+
+/**
+ * struct drm_xe_query_engine_cycles - correlate CPU and GPU timestamps
+ *
+ * If a query is made with a struct drm_xe_device_query where .query is equal to
+ * DRM_XE_DEVICE_QUERY_ENGINE_CYCLES, then the reply uses struct drm_xe_query_engine_cycles
+ * in .data. struct drm_xe_query_engine_cycles is allocated by the user and
+ * .data points to this allocated structure.
+ *
+ * The query returns the engine cycles, which along with GT's @reference_clock,
+ * can be used to calculate the engine timestamp. In addition the
+ * query returns a set of cpu timestamps that indicate when the command
+ * streamer cycle count was captured.
+ */
+struct drm_xe_query_engine_cycles {
+	/**
+	 * @eci: This is input by the user and is the engine for which command
+	 * streamer cycles is queried.
+	 */
+	struct drm_xe_engine_class_instance eci;
+
+	/**
+	 * @clockid: This is input by the user and is the reference clock id for
+	 * CPU timestamp. For definition, see clock_gettime(2) and
+	 * perf_event_open(2). Supported clock ids are CLOCK_MONOTONIC,
+	 * CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME, CLOCK_TAI.
+	 */
+	__s32 clockid;
+
+	/** @width: Width of the engine cycle counter in bits. */
+	__u32 width;
+
+	/**
+	 * @engine_cycles: Engine cycles as read from its register
+	 * at 0x358 offset.
+	 */
+	__u64 engine_cycles;
+
+	/**
+	 * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before
+	 * reading the engine_cycles register using the reference clockid set by the
+	 * user.
+	 */
+	__u64 cpu_timestamp;
+
+	/**
+	 * @cpu_delta: Time delta in ns captured around reading the lower dword
+	 * of the engine_cycles register.
+	 */
+	__u64 cpu_delta;
+};
+
+/**
+ * struct drm_xe_device_query - Input of &DRM_IOCTL_XE_DEVICE_QUERY - main
+ * structure to query device information
+ *
+ * The user selects the type of data to query among DRM_XE_DEVICE_QUERY_*
+ * and sets the value in the query member. This determines the type of
+ * the structure provided by the driver in data, among struct drm_xe_query_*.
+ *
+ * The @query can be:
+ *  - %DRM_XE_DEVICE_QUERY_ENGINES
+ *  - %DRM_XE_DEVICE_QUERY_MEM_REGIONS
+ *  - %DRM_XE_DEVICE_QUERY_CONFIG
+ *  - %DRM_XE_DEVICE_QUERY_GT_LIST
+ *  - %DRM_XE_DEVICE_QUERY_HWCONFIG - Query type to retrieve the hardware
+ *    configuration of the device such as information on slices, memory,
+ *    caches, and so on. It is provided as a table of key / value
+ *    attributes.
+ *  - %DRM_XE_DEVICE_QUERY_GT_TOPOLOGY
+ *  - %DRM_XE_DEVICE_QUERY_ENGINE_CYCLES
+ *
+ * If size is set to 0, the driver fills it with the required size for
+ * the requested type of data to query. If size is equal to the required
+ * size, the queried information is copied into data. If size is set to
+ * a value different from 0 and different from the required size, the
+ * IOCTL call returns -EINVAL.
+ *
+ * For example the following code snippet allows retrieving and printing
+ * information about the device engines with DRM_XE_DEVICE_QUERY_ENGINES:
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_xe_query_engines *engines;
+ *     struct drm_xe_device_query query = {
+ *         .extensions = 0,
+ *         .query = DRM_XE_DEVICE_QUERY_ENGINES,
+ *         .size = 0,
+ *         .data = 0,
+ *     };
+ *     ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
+ *     engines = malloc(query.size);
+ *     query.data = (uintptr_t)engines;
+ *     ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
+ *     for (int i = 0; i < engines->num_engines; i++) {
+ *         printf("Engine %d: %s\n", i,
+ *             engines->engines[i].instance.engine_class ==
+ *                 DRM_XE_ENGINE_CLASS_RENDER ? "RENDER":
+ *             engines->engines[i].instance.engine_class ==
+ *                 DRM_XE_ENGINE_CLASS_COPY ? "COPY":
+ *             engines->engines[i].instance.engine_class ==
+ *                 DRM_XE_ENGINE_CLASS_VIDEO_DECODE ? "VIDEO_DECODE":
+ *             engines->engines[i].instance.engine_class ==
+ *                 DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE ? "VIDEO_ENHANCE":
+ *             engines->engines[i].instance.engine_class ==
+ *                 DRM_XE_ENGINE_CLASS_COMPUTE ? "COMPUTE":
+ *             "UNKNOWN");
+ *     }
+ *     free(engines);
+ */
+struct drm_xe_device_query {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+#define DRM_XE_DEVICE_QUERY_ENGINES		0
+#define DRM_XE_DEVICE_QUERY_MEM_REGIONS		1
+#define DRM_XE_DEVICE_QUERY_CONFIG		2
+#define DRM_XE_DEVICE_QUERY_GT_LIST		3
+#define DRM_XE_DEVICE_QUERY_HWCONFIG		4
+#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY		5
+#define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES	6
+	/** @query: The type of data to query */
+	__u32 query;
+
+	/** @size: Size of the queried data */
+	__u32 size;
+
+	/** @data: Queried data is placed here */
+	__u64 data;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_gem_create - Input of &DRM_IOCTL_XE_GEM_CREATE - A structure for
+ * gem creation
+ *
+ * The @flags can be:
+ *  - %DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING
+ *  - %DRM_XE_GEM_CREATE_FLAG_SCANOUT
+ *  - %DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM - When using VRAM as a
+ *    possible placement, ensure that the corresponding VRAM allocation
+ *    will always use the CPU accessible part of VRAM. This is important
+ *    for small-bar systems (on full-bar systems this gets turned into a
+ *    noop).
+ *    Note1: System memory can be used as an extra placement if the kernel
+ *    should spill the allocation to system memory, if space can't be made
+ *    available in the CPU accessible part of VRAM (giving the same
+ *    behaviour as the i915 interface, see
+ *    I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS).
+ *    Note2: For clear-color CCS surfaces the kernel needs to read the
+ *    clear-color value stored in the buffer, and on discrete platforms we
+ *    need to use VRAM for display surfaces, therefore the kernel requires
+ *    setting this flag for such objects, otherwise an error is thrown on
+ *    small-bar systems.
+ *
+ * @cpu_caching supports the following values:
+ *  - %DRM_XE_GEM_CPU_CACHING_WB - Allocate the pages with write-back
+ *    caching. On iGPU this can't be used for scanout surfaces. Currently
+ *    not allowed for objects placed in VRAM.
+ *  - %DRM_XE_GEM_CPU_CACHING_WC - Allocate the pages as write-combined. This
+ *    is uncached. Scanout surfaces should likely use this. All objects
+ *    that can be placed in VRAM must use this.
+ */
+struct drm_xe_gem_create {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/**
+	 * @size: Size of the object to be created, must match region
+	 * (system or vram) minimum alignment (&min_page_size).
+	 */
+	__u64 size;
+
+	/**
+	 * @placement: A mask of memory instances of where BO can be placed.
+	 * Each index in this mask refers directly to the struct
+	 * drm_xe_query_mem_regions' instance, no assumptions should
+	 * be made about order. The type of each region is described
+	 * by struct drm_xe_query_mem_regions' mem_class.
+	 */
+	__u32 placement;
+
+#define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING		(1 << 0)
+#define DRM_XE_GEM_CREATE_FLAG_SCANOUT			(1 << 1)
+#define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM	(1 << 2)
+	/**
+	 * @flags: Flags, currently a mask of memory instances of where BO can
+	 * be placed
+	 */
+	__u32 flags;
+
+	/**
+	 * @vm_id: Attached VM, if any
+	 *
+	 * If a VM is specified, this BO must:
+	 *
+	 *  1. Only ever be bound to that VM.
+	 *  2. Cannot be exported as a PRIME fd.
+	 */
+	__u32 vm_id;
+
+	/**
+	 * @handle: Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+
+#define DRM_XE_GEM_CPU_CACHING_WB                      1
+#define DRM_XE_GEM_CPU_CACHING_WC                      2
+	/**
+	 * @cpu_caching: The CPU caching mode to select for this object. If
+	 * mmaping the object the mode selected here will also be used.
+	 */
+	__u16 cpu_caching;
+	/** @pad: MBZ */
+	__u16 pad[3];
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_gem_mmap_offset - Input of &DRM_IOCTL_XE_GEM_MMAP_OFFSET
+ */
+struct drm_xe_gem_mmap_offset {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @handle: Handle for the object being mapped. */
+	__u32 handle;
+
+	/** @flags: Must be zero */
+	__u32 flags;
+
+	/** @offset: The fake offset to use for subsequent mmap call */
+	__u64 offset;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_vm_create - Input of &DRM_IOCTL_XE_VM_CREATE
+ *
+ * The @flags can be:
+ *  - %DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE
+ *  - %DRM_XE_VM_CREATE_FLAG_LR_MODE - An LR, or Long Running VM accepts
+ *    exec submissions to its exec_queues that don't have an upper time
+ *    limit on the job execution time. But exec submissions to these
+ *    don't allow any of the flags DRM_XE_SYNC_FLAG_SYNCOBJ,
+ *    DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ, DRM_XE_SYNC_FLAG_DMA_BUF,
+ *    used as out-syncobjs, that is, together with DRM_XE_SYNC_FLAG_SIGNAL.
+ *    LR VMs can be created in recoverable page-fault mode using
+ *    DRM_XE_VM_CREATE_FLAG_FAULT_MODE, if the device supports it.
+ *    If that flag is omitted, the UMD can not rely on the slightly
+ *    different per-VM overcommit semantics that are enabled by
+ *    DRM_XE_VM_CREATE_FLAG_FAULT_MODE (see below), but KMD may
+ *    still enable recoverable pagefaults if supported by the device.
+ *  - %DRM_XE_VM_CREATE_FLAG_FAULT_MODE - Requires also
+ *    DRM_XE_VM_CREATE_FLAG_LR_MODE. It allows memory to be allocated on
+ *    demand when accessed, and also allows per-VM overcommit of memory.
+ *    The xe driver internally uses recoverable pagefaults to implement
+ *    this.
+ */
+struct drm_xe_vm_create {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+#define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE	(1 << 0)
+#define DRM_XE_VM_CREATE_FLAG_LR_MODE	        (1 << 1)
+#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE	(1 << 2)
+	/** @flags: Flags */
+	__u32 flags;
+
+	/** @vm_id: Returned VM ID */
+	__u32 vm_id;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_vm_destroy - Input of &DRM_IOCTL_XE_VM_DESTROY
+ */
+struct drm_xe_vm_destroy {
+	/** @vm_id: VM ID */
+	__u32 vm_id;
+
+	/** @pad: MBZ */
+	__u32 pad;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_vm_bind_op - run bind operations
+ *
+ * The @op can be:
+ *  - %DRM_XE_VM_BIND_OP_MAP
+ *  - %DRM_XE_VM_BIND_OP_UNMAP
+ *  - %DRM_XE_VM_BIND_OP_MAP_USERPTR
+ *  - %DRM_XE_VM_BIND_OP_UNMAP_ALL
+ *  - %DRM_XE_VM_BIND_OP_PREFETCH
+ *
+ * and the @flags can be:
+ *  - %DRM_XE_VM_BIND_FLAG_READONLY
+ *  - %DRM_XE_VM_BIND_FLAG_ASYNC
+ *  - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the
+ *    MAP operation immediately rather than deferring the MAP to the page
+ *    fault handler.
+ *  - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page
+ *    tables are setup with a special bit which indicates writes are
+ *    dropped and all reads return zero. In the future, the NULL flags
+ *    will only be valid for DRM_XE_VM_BIND_OP_MAP operations, the BO
+ *    handle MBZ, and the BO offset MBZ. This flag is intended to
+ *    implement VK sparse bindings.
+ */
+struct drm_xe_vm_bind_op {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/**
+	 * @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP
+	 */
+	__u32 obj;
+
+	/**
+	 * @pat_index: The platform defined @pat_index to use for this mapping.
+	 * The index basically maps to some predefined memory attributes,
+	 * including things like caching, coherency, compression etc.  The exact
+	 * meaning of the pat_index is platform specific and defined in the
+	 * Bspec and PRMs.  When the KMD sets up the binding the index here is
+	 * encoded into the ppGTT PTE.
+	 *
+	 * For coherency the @pat_index needs to be at least 1way coherent when
+	 * drm_xe_gem_create.cpu_caching is DRM_XE_GEM_CPU_CACHING_WB. The KMD
+	 * will extract the coherency mode from the @pat_index and reject if
+	 * there is a mismatch (see note below for pre-MTL platforms).
+	 *
+	 * Note: On pre-MTL platforms there is only a caching mode and no
+	 * explicit coherency mode, but on such hardware there is always a
+	 * shared-LLC (or is dgpu) so all GT memory accesses are coherent with
+	 * CPU caches even with the caching mode set as uncached.  It's only the
+	 * display engine that is incoherent (on dgpu it must be in VRAM which
+	 * is always mapped as WC on the CPU). However to keep the uapi somewhat
+	 * consistent with newer platforms the KMD groups the different cache
+	 * levels into the following coherency buckets on all pre-MTL platforms:
+	 *
+	 *	ppGTT UC -> COH_NONE
+	 *	ppGTT WC -> COH_NONE
+	 *	ppGTT WT -> COH_NONE
+	 *	ppGTT WB -> COH_AT_LEAST_1WAY
+	 *
+	 * In practice UC/WC/WT should only ever used for scanout surfaces on
+	 * such platforms (or perhaps in general for dma-buf if shared with
+	 * another device) since it is only the display engine that is actually
+	 * incoherent.  Everything else should typically use WB given that we
+	 * have a shared-LLC.  On MTL+ this completely changes and the HW
+	 * defines the coherency mode as part of the @pat_index, where
+	 * incoherent GT access is possible.
+	 *
+	 * Note: For userptr and externally imported dma-buf the kernel expects
+	 * either 1WAY or 2WAY for the @pat_index.
+	 *
+	 * For DRM_XE_VM_BIND_FLAG_NULL bindings there are no KMD restrictions
+	 * on the @pat_index. For such mappings there is no actual memory being
+	 * mapped (the address in the PTE is invalid), so the various PAT memory
+	 * attributes likely do not apply.  Simply leaving as zero is one
+	 * option (still a valid pat_index).
+	 */
+	__u16 pat_index;
+
+	/** @pad: MBZ */
+	__u16 pad;
+
+	union {
+		/**
+		 * @obj_offset: Offset into the object, MBZ for CLEAR_RANGE,
+		 * ignored for unbind
+		 */
+		__u64 obj_offset;
+
+		/** @userptr: user pointer to bind on */
+		__u64 userptr;
+	};
+
+	/**
+	 * @range: Number of bytes from the object to bind to addr, MBZ for UNMAP_ALL
+	 */
+	__u64 range;
+
+	/** @addr: Address to operate on, MBZ for UNMAP_ALL */
+	__u64 addr;
+
+#define DRM_XE_VM_BIND_OP_MAP		0x0
+#define DRM_XE_VM_BIND_OP_UNMAP		0x1
+#define DRM_XE_VM_BIND_OP_MAP_USERPTR	0x2
+#define DRM_XE_VM_BIND_OP_UNMAP_ALL	0x3
+#define DRM_XE_VM_BIND_OP_PREFETCH	0x4
+	/** @op: Bind operation to perform */
+	__u32 op;
+
+#define DRM_XE_VM_BIND_FLAG_READONLY	(1 << 0)
+#define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(1 << 1)
+#define DRM_XE_VM_BIND_FLAG_NULL	(1 << 2)
+	/** @flags: Bind flags */
+	__u32 flags;
+
+	/**
+	 * @prefetch_mem_region_instance: Memory region to prefetch VMA to.
+	 * It is a region instance, not a mask.
+	 * To be used only with %DRM_XE_VM_BIND_OP_PREFETCH operation.
+	 */
+	__u32 prefetch_mem_region_instance;
+
+	/** @pad2: MBZ */
+	__u32 pad2;
+
+	/** @reserved: Reserved */
+	__u64 reserved[3];
+};
+
+/**
+ * struct drm_xe_vm_bind - Input of &DRM_IOCTL_XE_VM_BIND
+ *
+ * Below is an example of a minimal use of @drm_xe_vm_bind to
+ * asynchronously bind the buffer `data` at address `BIND_ADDRESS` to
+ * illustrate `userptr`. It can be synchronized by using the example
+ * provided for @drm_xe_sync.
+ *
+ * .. code-block:: C
+ *
+ *     data = aligned_alloc(ALIGNMENT, BO_SIZE);
+ *     struct drm_xe_vm_bind bind = {
+ *         .vm_id = vm,
+ *         .num_binds = 1,
+ *         .bind.obj = 0,
+ *         .bind.obj_offset = to_user_pointer(data),
+ *         .bind.range = BO_SIZE,
+ *         .bind.addr = BIND_ADDRESS,
+ *         .bind.op = DRM_XE_VM_BIND_OP_MAP_USERPTR,
+ *         .bind.flags = 0,
+ *         .num_syncs = 1,
+ *         .syncs = &sync,
+ *         .exec_queue_id = 0,
+ *     };
+ *     ioctl(fd, DRM_IOCTL_XE_VM_BIND, &bind);
+ *
+ */
+struct drm_xe_vm_bind {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @vm_id: The ID of the VM to bind to */
+	__u32 vm_id;
+
+	/**
+	 * @exec_queue_id: exec_queue_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND
+	 * and exec queue must have same vm_id. If zero, the default VM bind engine
+	 * is used.
+	 */
+	__u32 exec_queue_id;
+
+	/** @pad: MBZ */
+	__u32 pad;
+
+	/** @num_binds: number of binds in this IOCTL */
+	__u32 num_binds;
+
+	union {
+		/** @bind: used if num_binds == 1 */
+		struct drm_xe_vm_bind_op bind;
+
+		/**
+		 * @vector_of_binds: userptr to array of struct
+		 * drm_xe_vm_bind_op if num_binds > 1
+		 */
+		__u64 vector_of_binds;
+	};
+
+	/** @pad2: MBZ */
+	__u32 pad2;
+
+	/** @num_syncs: amount of syncs to wait on */
+	__u32 num_syncs;
+
+	/** @syncs: pointer to struct drm_xe_sync array */
+	__u64 syncs;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_exec_queue_create - Input of &DRM_IOCTL_XE_EXEC_QUEUE_CREATE
+ *
+ * The example below shows how to use @drm_xe_exec_queue_create to create
+ * a simple exec_queue (no parallel submission) of class
+ * &DRM_XE_ENGINE_CLASS_RENDER.
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_xe_engine_class_instance instance = {
+ *         .engine_class = DRM_XE_ENGINE_CLASS_RENDER,
+ *     };
+ *     struct drm_xe_exec_queue_create exec_queue_create = {
+ *          .extensions = 0,
+ *          .vm_id = vm,
+ *          .num_bb_per_exec = 1,
+ *          .num_eng_per_bb = 1,
+ *          .instances = to_user_pointer(&instance),
+ *     };
+ *     ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &exec_queue_create);
+ *
+ */
+struct drm_xe_exec_queue_create {
+#define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY		0
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY		0
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE		1
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT	2
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE		3
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT		4
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER		5
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY		6
+#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY	7
+/* Monitor 128KB contiguous region with 4K sub-granularity */
+#define     DRM_XE_ACC_GRANULARITY_128K				0
+/* Monitor 2MB contiguous region with 64KB sub-granularity */
+#define     DRM_XE_ACC_GRANULARITY_2M				1
+/* Monitor 16MB contiguous region with 512KB sub-granularity */
+#define     DRM_XE_ACC_GRANULARITY_16M				2
+/* Monitor 64MB contiguous region with 2M sub-granularity */
+#define     DRM_XE_ACC_GRANULARITY_64M				3
+
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @width: submission width (number BB per exec) for this exec queue */
+	__u16 width;
+
+	/** @num_placements: number of valid placements for this exec queue */
+	__u16 num_placements;
+
+	/** @vm_id: VM to use for this exec queue */
+	__u32 vm_id;
+
+	/** @flags: MBZ */
+	__u32 flags;
+
+	/** @exec_queue_id: Returned exec queue ID */
+	__u32 exec_queue_id;
+
+	/**
+	 * @instances: user pointer to a 2-d array of struct
+	 * drm_xe_engine_class_instance
+	 *
+	 * length = width (i) * num_placements (j)
+	 * index = j + i * width
+	 */
+	__u64 instances;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_exec_queue_destroy - Input of &DRM_IOCTL_XE_EXEC_QUEUE_DESTROY
+ */
+struct drm_xe_exec_queue_destroy {
+	/** @exec_queue_id: Exec queue ID */
+	__u32 exec_queue_id;
+
+	/** @pad: MBZ */
+	__u32 pad;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_exec_queue_get_property - Input of &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
+ *
+ * The @property can be:
+ *  - %DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN
+ */
+struct drm_xe_exec_queue_get_property {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @exec_queue_id: Exec queue ID */
+	__u32 exec_queue_id;
+
+#define DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN	0
+	/** @property: property to get */
+	__u32 property;
+
+	/** @value: property value */
+	__u64 value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_sync - sync object
+ *
+ * The @type can be:
+ *  - %DRM_XE_SYNC_TYPE_SYNCOBJ
+ *  - %DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ
+ *  - %DRM_XE_SYNC_TYPE_USER_FENCE
+ *
+ * and the @flags can be:
+ *  - %DRM_XE_SYNC_FLAG_SIGNAL
+ *
+ * A minimal use of @drm_xe_sync looks like this:
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_xe_sync sync = {
+ *         .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+ *         .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
+ *     };
+ *     struct drm_syncobj_create syncobj_create = { 0 };
+ *     ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &syncobj_create);
+ *     sync.handle = syncobj_create.handle;
+ *         ...
+ *         use of &sync in drm_xe_exec or drm_xe_vm_bind
+ *         ...
+ *     struct drm_syncobj_wait wait = {
+ *         .handles = &sync.handle,
+ *         .timeout_nsec = INT64_MAX,
+ *         .count_handles = 1,
+ *         .flags = 0,
+ *         .first_signaled = 0,
+ *         .pad = 0,
+ *     };
+ *     ioctl(fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait);
+ */
+struct drm_xe_sync {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+#define DRM_XE_SYNC_TYPE_SYNCOBJ		0x0
+#define DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ	0x1
+#define DRM_XE_SYNC_TYPE_USER_FENCE		0x2
+	/** @type: Type of the this sync object */
+	__u32 type;
+
+#define DRM_XE_SYNC_FLAG_SIGNAL	(1 << 0)
+	/** @flags: Sync Flags */
+	__u32 flags;
+
+	union {
+		/** @handle: Handle for the object */
+		__u32 handle;
+
+		/**
+		 * @addr: Address of user fence. When sync is passed in via exec
+		 * IOCTL this is a GPU address in the VM. When sync passed in via
+		 * VM bind IOCTL this is a user pointer. In either case, it is
+		 * the users responsibility that this address is present and
+		 * mapped when the user fence is signalled. Must be qword
+		 * aligned.
+		 */
+		__u64 addr;
+	};
+
+	/**
+	 * @timeline_value: Input for the timeline sync object. Needs to be
+	 * different than 0 when used with %DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ.
+	 */
+	__u64 timeline_value;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_exec - Input of &DRM_IOCTL_XE_EXEC
+ *
+ * This is an example to use @drm_xe_exec for execution of the object
+ * at BIND_ADDRESS (see example in @drm_xe_vm_bind) by an exec_queue
+ * (see example in @drm_xe_exec_queue_create). It can be synchronized
+ * by using the example provided for @drm_xe_sync.
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_xe_exec exec = {
+ *         .exec_queue_id = exec_queue,
+ *         .syncs = &sync,
+ *         .num_syncs = 1,
+ *         .address = BIND_ADDRESS,
+ *         .num_batch_buffer = 1,
+ *     };
+ *     ioctl(fd, DRM_IOCTL_XE_EXEC, &exec);
+ *
+ */
+struct drm_xe_exec {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @exec_queue_id: Exec queue ID for the batch buffer */
+	__u32 exec_queue_id;
+
+	/** @num_syncs: Amount of struct drm_xe_sync in array. */
+	__u32 num_syncs;
+
+	/** @syncs: Pointer to struct drm_xe_sync array. */
+	__u64 syncs;
+
+	/**
+	 * @address: address of batch buffer if num_batch_buffer == 1 or an
+	 * array of batch buffer addresses
+	 */
+	__u64 address;
+
+	/**
+	 * @num_batch_buffer: number of batch buffer in this exec, must match
+	 * the width of the engine
+	 */
+	__u16 num_batch_buffer;
+
+	/** @pad: MBZ */
+	__u16 pad[3];
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_wait_user_fence - Input of &DRM_IOCTL_XE_WAIT_USER_FENCE
+ *
+ * Wait on user fence, XE will wake-up on every HW engine interrupt in the
+ * instances list and check if user fence is complete::
+ *
+ *	(*addr & MASK) OP (VALUE & MASK)
+ *
+ * Returns to user on user fence completion or timeout.
+ *
+ * The @op can be:
+ *  - %DRM_XE_UFENCE_WAIT_OP_EQ
+ *  - %DRM_XE_UFENCE_WAIT_OP_NEQ
+ *  - %DRM_XE_UFENCE_WAIT_OP_GT
+ *  - %DRM_XE_UFENCE_WAIT_OP_GTE
+ *  - %DRM_XE_UFENCE_WAIT_OP_LT
+ *  - %DRM_XE_UFENCE_WAIT_OP_LTE
+ *
+ * and the @flags can be:
+ *  - %DRM_XE_UFENCE_WAIT_FLAG_ABSTIME
+ *  - %DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP
+ *
+ * The @mask values can be for example:
+ *  - 0xffu for u8
+ *  - 0xffffu for u16
+ *  - 0xffffffffu for u32
+ *  - 0xffffffffffffffffu for u64
+ */
+struct drm_xe_wait_user_fence {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/**
+	 * @addr: user pointer address to wait on, must qword aligned
+	 */
+	__u64 addr;
+
+#define DRM_XE_UFENCE_WAIT_OP_EQ	0x0
+#define DRM_XE_UFENCE_WAIT_OP_NEQ	0x1
+#define DRM_XE_UFENCE_WAIT_OP_GT	0x2
+#define DRM_XE_UFENCE_WAIT_OP_GTE	0x3
+#define DRM_XE_UFENCE_WAIT_OP_LT	0x4
+#define DRM_XE_UFENCE_WAIT_OP_LTE	0x5
+	/** @op: wait operation (type of comparison) */
+	__u16 op;
+
+#define DRM_XE_UFENCE_WAIT_FLAG_ABSTIME	(1 << 0)
+	/** @flags: wait flags */
+	__u16 flags;
+
+	/** @pad: MBZ */
+	__u32 pad;
+
+	/** @value: compare value */
+	__u64 value;
+
+	/** @mask: comparison mask */
+	__u64 mask;
+
+	/**
+	 * @timeout: how long to wait before bailing, value in nanoseconds.
+	 * Without DRM_XE_UFENCE_WAIT_FLAG_ABSTIME flag set (relative timeout)
+	 * it contains timeout expressed in nanoseconds to wait (fence will
+	 * expire at now() + timeout).
+	 * When DRM_XE_UFENCE_WAIT_FLAG_ABSTIME flat is set (absolute timeout) wait
+	 * will end at timeout (uses system MONOTONIC_CLOCK).
+	 * Passing negative timeout leads to neverending wait.
+	 *
+	 * On relative timeout this value is updated with timeout left
+	 * (for restarting the call in case of signal delivery).
+	 * On absolute timeout this value stays intact (restarted call still
+	 * expire at the same point of time).
+	 */
+	__s64 timeout;
+
+	/** @exec_queue_id: exec_queue_id returned from xe_exec_queue_create_ioctl */
+	__u32 exec_queue_id;
+
+	/** @pad2: MBZ */
+	__u32 pad2;
+
+	/** @reserved: Reserved */
+	__u64 reserved[2];
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _UAPI_XE_DRM_H_ */
diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
index 5c6c4269f7ef..2ec6f35cda32 100644
--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -27,7 +27,7 @@
 	union { \
 		struct { MEMBERS } ATTRS; \
 		struct TAG { MEMBERS } ATTRS NAME; \
-	}
+	} ATTRS
 
 #ifdef __cplusplus
 /* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */
diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h
index 4a195b68f28f..b383c2fe0cf3 100644
--- a/include/uapi/linux/v4l2-subdev.h
+++ b/include/uapi/linux/v4l2-subdev.h
@@ -239,7 +239,7 @@ struct v4l2_subdev_routing {
  * set (which is the default), the 'stream' fields will be forced to 0 by the
  * kernel.
  */
- #define V4L2_SUBDEV_CLIENT_CAP_STREAMS		(1U << 0)
+ #define V4L2_SUBDEV_CLIENT_CAP_STREAMS		(1ULL << 0)
 
 /**
  * struct v4l2_subdev_client_capability - Capabilities of the client accessing
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index 3c19cccb1aec..8a8b07dfc444 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -273,7 +273,7 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
 	};
 	ktime_t timeout = KTIME_MAX;
 	struct io_uring_sync_cancel_reg sc;
-	struct fd f = { };
+	struct file *file = NULL;
 	DEFINE_WAIT(wait);
 	int ret, i;
 
@@ -295,10 +295,10 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
 	/* we can grab a normal file descriptor upfront */
 	if ((cd.flags & IORING_ASYNC_CANCEL_FD) &&
 	   !(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
-		f = fdget(sc.fd);
-		if (!f.file)
+		file = fget(sc.fd);
+		if (!file)
 			return -EBADF;
-		cd.file = f.file;
+		cd.file = file;
 	}
 
 	ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
@@ -348,6 +348,7 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
 	if (ret == -ENOENT || ret > 0)
 		ret = 0;
 out:
-	fdput(f);
+	if (file)
+		fput(file);
 	return ret;
 }
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index ed254076c723..9626a363f121 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -271,6 +271,7 @@ static __cold void io_fallback_req_func(struct work_struct *work)
 	struct io_kiocb *req, *tmp;
 	struct io_tw_state ts = { .locked = true, };
 
+	percpu_ref_get(&ctx->refs);
 	mutex_lock(&ctx->uring_lock);
 	llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
 		req->io_task_work.func(req, &ts);
@@ -278,6 +279,7 @@ static __cold void io_fallback_req_func(struct work_struct *work)
 		return;
 	io_submit_flush_completions(ctx);
 	mutex_unlock(&ctx->uring_lock);
+	percpu_ref_put(&ctx->refs);
 }
 
 static int io_alloc_hash_table(struct io_hash_table *table, unsigned bits)
@@ -325,6 +327,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_LIST_HEAD(&ctx->sqd_list);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
 	INIT_LIST_HEAD(&ctx->io_buffers_cache);
+	INIT_HLIST_HEAD(&ctx->io_buf_list);
 	io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
 			    sizeof(struct io_rsrc_node));
 	io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX,
@@ -2666,7 +2669,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 	return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
 }
 
-static void io_mem_free(void *ptr)
+void io_mem_free(void *ptr)
 {
 	if (!ptr)
 		return;
@@ -2697,6 +2700,7 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
 {
 	struct page **page_array;
 	unsigned int nr_pages;
+	void *page_addr;
 	int ret, i;
 
 	*npages = 0;
@@ -2718,27 +2722,29 @@ err:
 		io_pages_free(&page_array, ret > 0 ? ret : 0);
 		return ret < 0 ? ERR_PTR(ret) : ERR_PTR(-EFAULT);
 	}
-	/*
-	 * Should be a single page. If the ring is small enough that we can
-	 * use a normal page, that is fine. If we need multiple pages, then
-	 * userspace should use a huge page. That's the only way to guarantee
-	 * that we get contigious memory, outside of just being lucky or
-	 * (currently) having low memory fragmentation.
-	 */
-	if (page_array[0] != page_array[ret - 1])
-		goto err;
 
-	/*
-	 * Can't support mapping user allocated ring memory on 32-bit archs
-	 * where it could potentially reside in highmem. Just fail those with
-	 * -EINVAL, just like we did on kernels that didn't support this
-	 * feature.
-	 */
+	page_addr = page_address(page_array[0]);
 	for (i = 0; i < nr_pages; i++) {
-		if (PageHighMem(page_array[i])) {
-			ret = -EINVAL;
+		ret = -EINVAL;
+
+		/*
+		 * Can't support mapping user allocated ring memory on 32-bit
+		 * archs where it could potentially reside in highmem. Just
+		 * fail those with -EINVAL, just like we did on kernels that
+		 * didn't support this feature.
+		 */
+		if (PageHighMem(page_array[i]))
 			goto err;
-		}
+
+		/*
+		 * No support for discontig pages for now, should either be a
+		 * single normal page, or a huge page. Later on we can add
+		 * support for remapping discontig pages, for now we will
+		 * just fail them with EINVAL.
+		 */
+		if (page_address(page_array[i]) != page_addr)
+			goto err;
+		page_addr += PAGE_SIZE;
 	}
 
 	*pages = page_array;
@@ -2775,7 +2781,7 @@ static void io_rings_free(struct io_ring_ctx *ctx)
 	}
 }
 
-static void *io_mem_alloc(size_t size)
+void *io_mem_alloc(size_t size)
 {
 	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
 	void *ret;
@@ -2947,6 +2953,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 		ctx->mm_account = NULL;
 	}
 	io_rings_free(ctx);
+	io_kbuf_mmap_list_free(ctx);
 
 	percpu_ref_exit(&ctx->refs);
 	free_uid(ctx->user);
@@ -3141,12 +3148,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
 	init_completion(&exit.completion);
 	init_task_work(&exit.task_work, io_tctx_exit_cb);
 	exit.ctx = ctx;
-	/*
-	 * Some may use context even when all refs and requests have been put,
-	 * and they are free to do so while still holding uring_lock or
-	 * completion_lock, see io_req_task_submit(). Apart from other work,
-	 * this lock/unlock section also waits them to finish.
-	 */
+
 	mutex_lock(&ctx->uring_lock);
 	while (!list_empty(&ctx->tctx_list)) {
 		WARN_ON_ONCE(time_after(jiffies, timeout));
@@ -3475,25 +3477,27 @@ static void *io_uring_validate_mmap_request(struct file *file,
 	struct page *page;
 	void *ptr;
 
-	/* Don't allow mmap if the ring was setup without it */
-	if (ctx->flags & IORING_SETUP_NO_MMAP)
-		return ERR_PTR(-EINVAL);
-
 	switch (offset & IORING_OFF_MMAP_MASK) {
 	case IORING_OFF_SQ_RING:
 	case IORING_OFF_CQ_RING:
+		/* Don't allow mmap if the ring was setup without it */
+		if (ctx->flags & IORING_SETUP_NO_MMAP)
+			return ERR_PTR(-EINVAL);
 		ptr = ctx->rings;
 		break;
 	case IORING_OFF_SQES:
+		/* Don't allow mmap if the ring was setup without it */
+		if (ctx->flags & IORING_SETUP_NO_MMAP)
+			return ERR_PTR(-EINVAL);
 		ptr = ctx->sq_sqes;
 		break;
 	case IORING_OFF_PBUF_RING: {
 		unsigned int bgid;
 
 		bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
-		mutex_lock(&ctx->uring_lock);
+		rcu_read_lock();
 		ptr = io_pbuf_get_address(ctx, bgid);
-		mutex_unlock(&ctx->uring_lock);
+		rcu_read_unlock();
 		if (!ptr)
 			return ERR_PTR(-EINVAL);
 		break;
@@ -3645,7 +3649,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		size_t, argsz)
 {
 	struct io_ring_ctx *ctx;
-	struct fd f;
+	struct file *file;
 	long ret;
 
 	if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
@@ -3663,20 +3667,19 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
 			return -EINVAL;
 		fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
-		f.file = tctx->registered_rings[fd];
-		f.flags = 0;
-		if (unlikely(!f.file))
+		file = tctx->registered_rings[fd];
+		if (unlikely(!file))
 			return -EBADF;
 	} else {
-		f = fdget(fd);
-		if (unlikely(!f.file))
+		file = fget(fd);
+		if (unlikely(!file))
 			return -EBADF;
 		ret = -EOPNOTSUPP;
-		if (unlikely(!io_is_uring_fops(f.file)))
+		if (unlikely(!io_is_uring_fops(file)))
 			goto out;
 	}
 
-	ctx = f.file->private_data;
+	ctx = file->private_data;
 	ret = -EBADFD;
 	if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED))
 		goto out;
@@ -3770,7 +3773,8 @@ iopoll_locked:
 		}
 	}
 out:
-	fdput(f);
+	if (!(flags & IORING_ENTER_REGISTERED_RING))
+		fput(file);
 	return ret;
 }
 
@@ -4611,7 +4615,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
 {
 	struct io_ring_ctx *ctx;
 	long ret = -EBADF;
-	struct fd f;
+	struct file *file;
 	bool use_registered_ring;
 
 	use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING);
@@ -4630,27 +4634,27 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
 		if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
 			return -EINVAL;
 		fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
-		f.file = tctx->registered_rings[fd];
-		f.flags = 0;
-		if (unlikely(!f.file))
+		file = tctx->registered_rings[fd];
+		if (unlikely(!file))
 			return -EBADF;
 	} else {
-		f = fdget(fd);
-		if (unlikely(!f.file))
+		file = fget(fd);
+		if (unlikely(!file))
 			return -EBADF;
 		ret = -EOPNOTSUPP;
-		if (!io_is_uring_fops(f.file))
+		if (!io_is_uring_fops(file))
 			goto out_fput;
 	}
 
-	ctx = f.file->private_data;
+	ctx = file->private_data;
 
 	mutex_lock(&ctx->uring_lock);
 	ret = __io_uring_register(ctx, opcode, arg, nr_args);
 	mutex_unlock(&ctx->uring_lock);
 	trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret);
 out_fput:
-	fdput(f);
+	if (!use_registered_ring)
+		fput(file);
 	return ret;
 }
 
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index dc6d779b452b..ed84f2737b3a 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -86,6 +86,9 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
 bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
 			bool cancel_all);
 
+void *io_mem_alloc(size_t size);
+void io_mem_free(void *ptr);
+
 #if defined(CONFIG_PROVE_LOCKING)
 static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
 {
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index a1e4239c7d75..72b6af1d2ed3 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -33,19 +33,42 @@ struct io_provide_buf {
 	__u16				bid;
 };
 
+struct io_buf_free {
+	struct hlist_node		list;
+	void				*mem;
+	size_t				size;
+	int				inuse;
+};
+
+static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
+						   struct io_buffer_list *bl,
+						   unsigned int bgid)
+{
+	if (bl && bgid < BGID_ARRAY)
+		return &bl[bgid];
+
+	return xa_load(&ctx->io_bl_xa, bgid);
+}
+
 static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
 							unsigned int bgid)
 {
-	if (ctx->io_bl && bgid < BGID_ARRAY)
-		return &ctx->io_bl[bgid];
+	lockdep_assert_held(&ctx->uring_lock);
 
-	return xa_load(&ctx->io_bl_xa, bgid);
+	return __io_buffer_get_list(ctx, ctx->io_bl, bgid);
 }
 
 static int io_buffer_add_list(struct io_ring_ctx *ctx,
 			      struct io_buffer_list *bl, unsigned int bgid)
 {
+	/*
+	 * Store buffer group ID and finally mark the list as visible.
+	 * The normal lookup doesn't care about the visibility as we're
+	 * always under the ->uring_lock, but the RCU lookup from mmap does.
+	 */
 	bl->bgid = bgid;
+	smp_store_release(&bl->is_ready, 1);
+
 	if (bgid < BGID_ARRAY)
 		return 0;
 
@@ -196,21 +219,40 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
 
 static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
 {
+	struct io_buffer_list *bl;
 	int i;
 
-	ctx->io_bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list),
-				GFP_KERNEL);
-	if (!ctx->io_bl)
+	bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL);
+	if (!bl)
 		return -ENOMEM;
 
 	for (i = 0; i < BGID_ARRAY; i++) {
-		INIT_LIST_HEAD(&ctx->io_bl[i].buf_list);
-		ctx->io_bl[i].bgid = i;
+		INIT_LIST_HEAD(&bl[i].buf_list);
+		bl[i].bgid = i;
 	}
 
+	smp_store_release(&ctx->io_bl, bl);
 	return 0;
 }
 
+/*
+ * Mark the given mapped range as free for reuse
+ */
+static void io_kbuf_mark_free(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+{
+	struct io_buf_free *ibf;
+
+	hlist_for_each_entry(ibf, &ctx->io_buf_list, list) {
+		if (bl->buf_ring == ibf->mem) {
+			ibf->inuse = 0;
+			return;
+		}
+	}
+
+	/* can't happen... */
+	WARN_ON_ONCE(1);
+}
+
 static int __io_remove_buffers(struct io_ring_ctx *ctx,
 			       struct io_buffer_list *bl, unsigned nbufs)
 {
@@ -223,7 +265,11 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
 	if (bl->is_mapped) {
 		i = bl->buf_ring->tail - bl->head;
 		if (bl->is_mmap) {
-			folio_put(virt_to_folio(bl->buf_ring));
+			/*
+			 * io_kbuf_list_free() will free the page(s) at
+			 * ->release() time.
+			 */
+			io_kbuf_mark_free(ctx, bl);
 			bl->buf_ring = NULL;
 			bl->is_mmap = 0;
 		} else if (bl->buf_nr_pages) {
@@ -274,9 +320,17 @@ void io_destroy_buffers(struct io_ring_ctx *ctx)
 	xa_for_each(&ctx->io_bl_xa, index, bl) {
 		xa_erase(&ctx->io_bl_xa, bl->bgid);
 		__io_remove_buffers(ctx, bl, -1U);
-		kfree(bl);
+		kfree_rcu(bl, rcu);
 	}
 
+	/*
+	 * Move deferred locked entries to cache before pruning
+	 */
+	spin_lock(&ctx->completion_lock);
+	if (!list_empty(&ctx->io_buffers_comp))
+		list_splice_init(&ctx->io_buffers_comp, &ctx->io_buffers_cache);
+	spin_unlock(&ctx->completion_lock);
+
 	list_for_each_safe(item, tmp, &ctx->io_buffers_cache) {
 		buf = list_entry(item, struct io_buffer, list);
 		kmem_cache_free(io_buf_cachep, buf);
@@ -460,7 +514,16 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 		INIT_LIST_HEAD(&bl->buf_list);
 		ret = io_buffer_add_list(ctx, bl, p->bgid);
 		if (ret) {
-			kfree(bl);
+			/*
+			 * Doesn't need rcu free as it was never visible, but
+			 * let's keep it consistent throughout. Also can't
+			 * be a lower indexed array group, as adding one
+			 * where lookup failed cannot happen.
+			 */
+			if (p->bgid >= BGID_ARRAY)
+				kfree_rcu(bl, rcu);
+			else
+				WARN_ON_ONCE(1);
 			goto err;
 		}
 	}
@@ -531,19 +594,63 @@ error_unpin:
 	return -EINVAL;
 }
 
-static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,
+/*
+ * See if we have a suitable region that we can reuse, rather than allocate
+ * both a new io_buf_free and mem region again. We leave it on the list as
+ * even a reused entry will need freeing at ring release.
+ */
+static struct io_buf_free *io_lookup_buf_free_entry(struct io_ring_ctx *ctx,
+						    size_t ring_size)
+{
+	struct io_buf_free *ibf, *best = NULL;
+	size_t best_dist;
+
+	hlist_for_each_entry(ibf, &ctx->io_buf_list, list) {
+		size_t dist;
+
+		if (ibf->inuse || ibf->size < ring_size)
+			continue;
+		dist = ibf->size - ring_size;
+		if (!best || dist < best_dist) {
+			best = ibf;
+			if (!dist)
+				break;
+			best_dist = dist;
+		}
+	}
+
+	return best;
+}
+
+static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx,
+			      struct io_uring_buf_reg *reg,
 			      struct io_buffer_list *bl)
 {
-	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
+	struct io_buf_free *ibf;
 	size_t ring_size;
 	void *ptr;
 
 	ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
-	ptr = (void *) __get_free_pages(gfp, get_order(ring_size));
-	if (!ptr)
-		return -ENOMEM;
 
-	bl->buf_ring = ptr;
+	/* Reuse existing entry, if we can */
+	ibf = io_lookup_buf_free_entry(ctx, ring_size);
+	if (!ibf) {
+		ptr = io_mem_alloc(ring_size);
+		if (IS_ERR(ptr))
+			return PTR_ERR(ptr);
+
+		/* Allocate and store deferred free entry */
+		ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT);
+		if (!ibf) {
+			io_mem_free(ptr);
+			return -ENOMEM;
+		}
+		ibf->mem = ptr;
+		ibf->size = ring_size;
+		hlist_add_head(&ibf->list, &ctx->io_buf_list);
+	}
+	ibf->inuse = 1;
+	bl->buf_ring = ibf->mem;
 	bl->is_mapped = 1;
 	bl->is_mmap = 1;
 	return 0;
@@ -555,6 +662,8 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
 	struct io_buffer_list *bl, *free_bl = NULL;
 	int ret;
 
+	lockdep_assert_held(&ctx->uring_lock);
+
 	if (copy_from_user(&reg, arg, sizeof(reg)))
 		return -EFAULT;
 
@@ -599,7 +708,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
 	if (!(reg.flags & IOU_PBUF_RING_MMAP))
 		ret = io_pin_pbuf_ring(&reg, bl);
 	else
-		ret = io_alloc_pbuf_ring(&reg, bl);
+		ret = io_alloc_pbuf_ring(ctx, &reg, bl);
 
 	if (!ret) {
 		bl->nr_entries = reg.ring_entries;
@@ -609,7 +718,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
 		return 0;
 	}
 
-	kfree(free_bl);
+	kfree_rcu(free_bl, rcu);
 	return ret;
 }
 
@@ -618,6 +727,8 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
 	struct io_uring_buf_reg reg;
 	struct io_buffer_list *bl;
 
+	lockdep_assert_held(&ctx->uring_lock);
+
 	if (copy_from_user(&reg, arg, sizeof(reg)))
 		return -EFAULT;
 	if (reg.resv[0] || reg.resv[1] || reg.resv[2])
@@ -634,7 +745,7 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
 	__io_remove_buffers(ctx, bl, -1U);
 	if (bl->bgid >= BGID_ARRAY) {
 		xa_erase(&ctx->io_bl_xa, bl->bgid);
-		kfree(bl);
+		kfree_rcu(bl, rcu);
 	}
 	return 0;
 }
@@ -643,9 +754,33 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
 {
 	struct io_buffer_list *bl;
 
-	bl = io_buffer_get_list(ctx, bgid);
+	bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid);
+
 	if (!bl || !bl->is_mmap)
 		return NULL;
+	/*
+	 * Ensure the list is fully setup. Only strictly needed for RCU lookup
+	 * via mmap, and in that case only for the array indexed groups. For
+	 * the xarray lookups, it's either visible and ready, or not at all.
+	 */
+	if (!smp_load_acquire(&bl->is_ready))
+		return NULL;
 
 	return bl->buf_ring;
 }
+
+/*
+ * Called at or after ->release(), free the mmap'ed buffers that we used
+ * for memory mapped provided buffer rings.
+ */
+void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx)
+{
+	struct io_buf_free *ibf;
+	struct hlist_node *tmp;
+
+	hlist_for_each_entry_safe(ibf, tmp, &ctx->io_buf_list, list) {
+		hlist_del(&ibf->list);
+		io_mem_free(ibf->mem);
+		kfree(ibf);
+	}
+}
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index f2d615236b2c..9be5960817ea 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -15,6 +15,7 @@ struct io_buffer_list {
 			struct page **buf_pages;
 			struct io_uring_buf_ring *buf_ring;
 		};
+		struct rcu_head rcu;
 	};
 	__u16 bgid;
 
@@ -28,6 +29,8 @@ struct io_buffer_list {
 	__u8 is_mapped;
 	/* ring mapped provided buffers, but mmap'ed by application */
 	__u8 is_mmap;
+	/* bl is visible from an RCU point of view for lookup */
+	__u8 is_ready;
 };
 
 struct io_buffer {
@@ -51,6 +54,8 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags);
 int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
 int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
 
+void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx);
+
 unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
 
 bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 8625181fb87a..08ac0d8e07ef 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -77,17 +77,10 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 
 int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file);
 
-#if defined(CONFIG_UNIX)
-static inline bool io_file_need_scm(struct file *filp)
-{
-	return !!unix_get_socket(filp);
-}
-#else
 static inline bool io_file_need_scm(struct file *filp)
 {
 	return false;
 }
-#endif
 
 static inline int io_scm_file_account(struct io_ring_ctx *ctx,
 				      struct file *file)
diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec
index 7aff28ded2f4..1cc3b1c595d7 100644
--- a/kernel/Kconfig.kexec
+++ b/kernel/Kconfig.kexec
@@ -97,7 +97,6 @@ config CRASH_DUMP
 	depends on ARCH_SUPPORTS_KEXEC
 	select CRASH_CORE
 	select KEXEC_CORE
-	select KEXEC
 	help
 	  Generate crash dump after being started by kexec.
 	  This should be normally only set in special crash dump kernels
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2058e89b5ddd..c85ff9162a5c 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -1012,11 +1012,16 @@ static void prog_array_map_poke_untrack(struct bpf_map *map,
 	mutex_unlock(&aux->poke_mutex);
 }
 
+void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
+				      struct bpf_prog *new, struct bpf_prog *old)
+{
+	WARN_ON_ONCE(1);
+}
+
 static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
 				    struct bpf_prog *old,
 				    struct bpf_prog *new)
 {
-	u8 *old_addr, *new_addr, *old_bypass_addr;
 	struct prog_poke_elem *elem;
 	struct bpf_array_aux *aux;
 
@@ -1025,7 +1030,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
 
 	list_for_each_entry(elem, &aux->poke_progs, list) {
 		struct bpf_jit_poke_descriptor *poke;
-		int i, ret;
+		int i;
 
 		for (i = 0; i < elem->aux->size_poke_tab; i++) {
 			poke = &elem->aux->poke_tab[i];
@@ -1044,21 +1049,10 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
 			 *    activated, so tail call updates can arrive from here
 			 *    while JIT is still finishing its final fixup for
 			 *    non-activated poke entries.
-			 * 3) On program teardown, the program's kallsym entry gets
-			 *    removed out of RCU callback, but we can only untrack
-			 *    from sleepable context, therefore bpf_arch_text_poke()
-			 *    might not see that this is in BPF text section and
-			 *    bails out with -EINVAL. As these are unreachable since
-			 *    RCU grace period already passed, we simply skip them.
-			 * 4) Also programs reaching refcount of zero while patching
+			 * 3) Also programs reaching refcount of zero while patching
 			 *    is in progress is okay since we're protected under
 			 *    poke_mutex and untrack the programs before the JIT
-			 *    buffer is freed. When we're still in the middle of
-			 *    patching and suddenly kallsyms entry of the program
-			 *    gets evicted, we just skip the rest which is fine due
-			 *    to point 3).
-			 * 5) Any other error happening below from bpf_arch_text_poke()
-			 *    is a unexpected bug.
+			 *    buffer is freed.
 			 */
 			if (!READ_ONCE(poke->tailcall_target_stable))
 				continue;
@@ -1068,39 +1062,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
 			    poke->tail_call.key != key)
 				continue;
 
-			old_bypass_addr = old ? NULL : poke->bypass_addr;
-			old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
-			new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
-
-			if (new) {
-				ret = bpf_arch_text_poke(poke->tailcall_target,
-							 BPF_MOD_JUMP,
-							 old_addr, new_addr);
-				BUG_ON(ret < 0 && ret != -EINVAL);
-				if (!old) {
-					ret = bpf_arch_text_poke(poke->tailcall_bypass,
-								 BPF_MOD_JUMP,
-								 poke->bypass_addr,
-								 NULL);
-					BUG_ON(ret < 0 && ret != -EINVAL);
-				}
-			} else {
-				ret = bpf_arch_text_poke(poke->tailcall_bypass,
-							 BPF_MOD_JUMP,
-							 old_bypass_addr,
-							 poke->bypass_addr);
-				BUG_ON(ret < 0 && ret != -EINVAL);
-				/* let other CPUs finish the execution of program
-				 * so that it will not possible to expose them
-				 * to invalid nop, stack unwind, nop state
-				 */
-				if (!ret)
-					synchronize_rcu();
-				ret = bpf_arch_text_poke(poke->tailcall_target,
-							 BPF_MOD_JUMP,
-							 old_addr, NULL);
-				BUG_ON(ret < 0 && ret != -EINVAL);
-			}
+			bpf_arch_poke_desc_update(poke, new, old);
 		}
 	}
 }
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index cd3afe57ece3..fe254ae035fe 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -371,14 +371,18 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old,
 static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
 				s32 end_new, s32 curr, const bool probe_pass)
 {
-	const s32 off_min = S16_MIN, off_max = S16_MAX;
+	s64 off_min, off_max, off;
 	s32 delta = end_new - end_old;
-	s32 off;
 
-	if (insn->code == (BPF_JMP32 | BPF_JA))
+	if (insn->code == (BPF_JMP32 | BPF_JA)) {
 		off = insn->imm;
-	else
+		off_min = S32_MIN;
+		off_max = S32_MAX;
+	} else {
 		off = insn->off;
+		off_min = S16_MIN;
+		off_max = S16_MAX;
+	}
 
 	if (curr < pos && curr + off + 1 >= end_old)
 		off += delta;
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 63b909d277d4..6a51cfe4c2d6 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -978,6 +978,8 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
 		memcg = get_memcg(c);
 		old_memcg = set_active_memcg(memcg);
 		ret = __alloc(c, NUMA_NO_NODE, GFP_KERNEL | __GFP_NOWARN | __GFP_ACCOUNT);
+		if (ret)
+			*(struct bpf_mem_cache **)ret = c;
 		set_active_memcg(old_memcg);
 		mem_cgroup_put(memcg);
 	}
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
index 122dacb3a443..66d1708042a7 100644
--- a/kernel/cgroup/legacy_freezer.c
+++ b/kernel/cgroup/legacy_freezer.c
@@ -66,9 +66,15 @@ static struct freezer *parent_freezer(struct freezer *freezer)
 bool cgroup_freezing(struct task_struct *task)
 {
 	bool ret;
+	unsigned int state;
 
 	rcu_read_lock();
-	ret = task_freezer(task)->state & CGROUP_FREEZING;
+	/* Check if the cgroup is still FREEZING, but not FROZEN. The extra
+	 * !FROZEN check is required, because the FREEZING bit is not cleared
+	 * when the state FROZEN is reached.
+	 */
+	state = task_freezer(task)->state;
+	ret = (state & CGROUP_FREEZING) && !(state & CGROUP_FROZEN);
 	rcu_read_unlock();
 
 	return ret;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b704d83a28b2..c9d123e13b57 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1814,31 +1814,34 @@ static inline void perf_event__state_init(struct perf_event *event)
 					      PERF_EVENT_STATE_INACTIVE;
 }
 
-static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
+static int __perf_event_read_size(u64 read_format, int nr_siblings)
 {
 	int entry = sizeof(u64); /* value */
 	int size = 0;
 	int nr = 1;
 
-	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
 		size += sizeof(u64);
 
-	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 		size += sizeof(u64);
 
-	if (event->attr.read_format & PERF_FORMAT_ID)
+	if (read_format & PERF_FORMAT_ID)
 		entry += sizeof(u64);
 
-	if (event->attr.read_format & PERF_FORMAT_LOST)
+	if (read_format & PERF_FORMAT_LOST)
 		entry += sizeof(u64);
 
-	if (event->attr.read_format & PERF_FORMAT_GROUP) {
+	if (read_format & PERF_FORMAT_GROUP) {
 		nr += nr_siblings;
 		size += sizeof(u64);
 	}
 
-	size += entry * nr;
-	event->read_size = size;
+	/*
+	 * Since perf_event_validate_size() limits this to 16k and inhibits
+	 * adding more siblings, this will never overflow.
+	 */
+	return size + nr * entry;
 }
 
 static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
@@ -1888,8 +1891,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
  */
 static void perf_event__header_size(struct perf_event *event)
 {
-	__perf_event_read_size(event,
-			       event->group_leader->nr_siblings);
+	event->read_size =
+		__perf_event_read_size(event->attr.read_format,
+				       event->group_leader->nr_siblings);
 	__perf_event_header_size(event, event->attr.sample_type);
 }
 
@@ -1920,24 +1924,35 @@ static void perf_event__id_header_size(struct perf_event *event)
 	event->id_header_size = size;
 }
 
+/*
+ * Check that adding an event to the group does not result in anybody
+ * overflowing the 64k event limit imposed by the output buffer.
+ *
+ * Specifically, check that the read_size for the event does not exceed 16k,
+ * read_size being the one term that grows with groups size. Since read_size
+ * depends on per-event read_format, also (re)check the existing events.
+ *
+ * This leaves 48k for the constant size fields and things like callchains,
+ * branch stacks and register sets.
+ */
 static bool perf_event_validate_size(struct perf_event *event)
 {
-	/*
-	 * The values computed here will be over-written when we actually
-	 * attach the event.
-	 */
-	__perf_event_read_size(event, event->group_leader->nr_siblings + 1);
-	__perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
-	perf_event__id_header_size(event);
+	struct perf_event *sibling, *group_leader = event->group_leader;
 
-	/*
-	 * Sum the lot; should not exceed the 64k limit we have on records.
-	 * Conservative limit to allow for callchains and other variable fields.
-	 */
-	if (event->read_size + event->header_size +
-	    event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
+	if (__perf_event_read_size(event->attr.read_format,
+				   group_leader->nr_siblings + 1) > 16*1024)
 		return false;
 
+	if (__perf_event_read_size(group_leader->attr.read_format,
+				   group_leader->nr_siblings + 1) > 16*1024)
+		return false;
+
+	for_each_sibling_event(sibling, group_leader) {
+		if (__perf_event_read_size(sibling->attr.read_format,
+					   group_leader->nr_siblings + 1) > 16*1024)
+			return false;
+	}
+
 	return true;
 }
 
diff --git a/kernel/freezer.c b/kernel/freezer.c
index c450fa8b8b5e..759006a9a910 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -201,7 +201,7 @@ void __thaw_task(struct task_struct *p)
 	if (WARN_ON_ONCE(freezing(p)))
 		goto unlock;
 
-	if (task_call_func(p, __restore_freezer_state, NULL))
+	if (!frozen(p) || task_call_func(p, __restore_freezer_state, NULL))
 		goto unlock;
 
 	wake_up_state(p, TASK_FROZEN);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 075a632e6c7c..d5a0ee40bf66 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2252,7 +2252,7 @@ int register_kretprobe(struct kretprobe *rp)
 		rp->rph = NULL;
 		return -ENOMEM;
 	}
-	rp->rph->rp = rp;
+	rcu_assign_pointer(rp->rph->rp, rp);
 	rp->nmissed = 0;
 	/* Establish function entry probe point */
 	ret = register_kprobe(&rp->kp);
@@ -2300,7 +2300,7 @@ void unregister_kretprobes(struct kretprobe **rps, int num)
 #ifdef CONFIG_KRETPROBE_ON_RETHOOK
 		rethook_free(rps[i]->rh);
 #else
-		rps[i]->rph->rp = NULL;
+		rcu_assign_pointer(rps[i]->rph->rp, NULL);
 #endif
 	}
 	mutex_unlock(&kprobe_mutex);
diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c
index 6fd7d4ecbbc6..fa03094e9e69 100644
--- a/kernel/trace/rethook.c
+++ b/kernel/trace/rethook.c
@@ -48,7 +48,7 @@ static void rethook_free_rcu(struct rcu_head *head)
  */
 void rethook_stop(struct rethook *rh)
 {
-	WRITE_ONCE(rh->handler, NULL);
+	rcu_assign_pointer(rh->handler, NULL);
 }
 
 /**
@@ -63,7 +63,7 @@ void rethook_stop(struct rethook *rh)
  */
 void rethook_free(struct rethook *rh)
 {
-	WRITE_ONCE(rh->handler, NULL);
+	rethook_stop(rh);
 
 	call_rcu(&rh->rcu, rethook_free_rcu);
 }
@@ -82,6 +82,12 @@ static int rethook_fini_pool(struct objpool_head *head, void *context)
 	return 0;
 }
 
+static inline rethook_handler_t rethook_get_handler(struct rethook *rh)
+{
+	return (rethook_handler_t)rcu_dereference_check(rh->handler,
+							rcu_read_lock_any_held());
+}
+
 /**
  * rethook_alloc() - Allocate struct rethook.
  * @data: a data to pass the @handler when hooking the return.
@@ -107,7 +113,7 @@ struct rethook *rethook_alloc(void *data, rethook_handler_t handler,
 		return ERR_PTR(-ENOMEM);
 
 	rh->data = data;
-	rh->handler = handler;
+	rcu_assign_pointer(rh->handler, handler);
 
 	/* initialize the objpool for rethook nodes */
 	if (objpool_init(&rh->pool, num, size, GFP_KERNEL, rh,
@@ -135,9 +141,10 @@ static void free_rethook_node_rcu(struct rcu_head *head)
  */
 void rethook_recycle(struct rethook_node *node)
 {
-	lockdep_assert_preemption_disabled();
+	rethook_handler_t handler;
 
-	if (likely(READ_ONCE(node->rethook->handler)))
+	handler = rethook_get_handler(node->rethook);
+	if (likely(handler))
 		objpool_push(node, &node->rethook->pool);
 	else
 		call_rcu(&node->rcu, free_rethook_node_rcu);
@@ -153,9 +160,7 @@ NOKPROBE_SYMBOL(rethook_recycle);
  */
 struct rethook_node *rethook_try_get(struct rethook *rh)
 {
-	rethook_handler_t handler = READ_ONCE(rh->handler);
-
-	lockdep_assert_preemption_disabled();
+	rethook_handler_t handler = rethook_get_handler(rh);
 
 	/* Check whether @rh is going to be freed. */
 	if (unlikely(!handler))
@@ -300,7 +305,7 @@ unsigned long rethook_trampoline_handler(struct pt_regs *regs,
 		rhn = container_of(first, struct rethook_node, llist);
 		if (WARN_ON_ONCE(rhn->frame != frame))
 			break;
-		handler = READ_ONCE(rhn->rethook->handler);
+		handler = rethook_get_handler(rhn->rethook);
 		if (handler)
 			handler(rhn, rhn->rethook->data,
 				correct_ret_addr, regs);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 43cc47d7faaf..8d2a4f00eca9 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -644,8 +644,8 @@ static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt)
 
 	*cnt = rb_time_cnt(top);
 
-	/* If top and bottom counts don't match, this interrupted a write */
-	if (*cnt != rb_time_cnt(bottom))
+	/* If top and msb counts don't match, this interrupted a write */
+	if (*cnt != rb_time_cnt(msb))
 		return false;
 
 	/* The shift to msb will lose its cnt bits */
@@ -3030,23 +3030,20 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
 			local_read(&bpage->write) & ~RB_WRITE_MASK;
 		unsigned long event_length = rb_event_length(event);
 
+		/*
+		 * For the before_stamp to be different than the write_stamp
+		 * to make sure that the next event adds an absolute
+		 * value and does not rely on the saved write stamp, which
+		 * is now going to be bogus.
+		 */
+		rb_time_set(&cpu_buffer->before_stamp, 0);
+
 		/* Something came in, can't discard */
 		if (!rb_time_cmpxchg(&cpu_buffer->write_stamp,
 				       write_stamp, write_stamp - delta))
 			return false;
 
 		/*
-		 * It's possible that the event time delta is zero
-		 * (has the same time stamp as the previous event)
-		 * in which case write_stamp and before_stamp could
-		 * be the same. In such a case, force before_stamp
-		 * to be different than write_stamp. It doesn't
-		 * matter what it is, as long as its different.
-		 */
-		if (!delta)
-			rb_time_set(&cpu_buffer->before_stamp, 0);
-
-		/*
 		 * If an event were to come in now, it would see that the
 		 * write_stamp and the before_stamp are different, and assume
 		 * that this event just added itself before updating
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9aebf904ff97..fbcd3bafb93e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2360,13 +2360,7 @@ int is_tracing_stopped(void)
 	return global_trace.stop_count;
 }
 
-/**
- * tracing_start - quick start of the tracer
- *
- * If tracing is enabled but was stopped by tracing_stop,
- * this will start the tracer back up.
- */
-void tracing_start(void)
+static void tracing_start_tr(struct trace_array *tr)
 {
 	struct trace_buffer *buffer;
 	unsigned long flags;
@@ -2374,119 +2368,83 @@ void tracing_start(void)
 	if (tracing_disabled)
 		return;
 
-	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
-	if (--global_trace.stop_count) {
-		if (global_trace.stop_count < 0) {
+	raw_spin_lock_irqsave(&tr->start_lock, flags);
+	if (--tr->stop_count) {
+		if (WARN_ON_ONCE(tr->stop_count < 0)) {
 			/* Someone screwed up their debugging */
-			WARN_ON_ONCE(1);
-			global_trace.stop_count = 0;
+			tr->stop_count = 0;
 		}
 		goto out;
 	}
 
 	/* Prevent the buffers from switching */
-	arch_spin_lock(&global_trace.max_lock);
+	arch_spin_lock(&tr->max_lock);
 
-	buffer = global_trace.array_buffer.buffer;
+	buffer = tr->array_buffer.buffer;
 	if (buffer)
 		ring_buffer_record_enable(buffer);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
-	buffer = global_trace.max_buffer.buffer;
+	buffer = tr->max_buffer.buffer;
 	if (buffer)
 		ring_buffer_record_enable(buffer);
 #endif
 
-	arch_spin_unlock(&global_trace.max_lock);
-
- out:
-	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
-}
-
-static void tracing_start_tr(struct trace_array *tr)
-{
-	struct trace_buffer *buffer;
-	unsigned long flags;
-
-	if (tracing_disabled)
-		return;
-
-	/* If global, we need to also start the max tracer */
-	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
-		return tracing_start();
-
-	raw_spin_lock_irqsave(&tr->start_lock, flags);
-
-	if (--tr->stop_count) {
-		if (tr->stop_count < 0) {
-			/* Someone screwed up their debugging */
-			WARN_ON_ONCE(1);
-			tr->stop_count = 0;
-		}
-		goto out;
-	}
-
-	buffer = tr->array_buffer.buffer;
-	if (buffer)
-		ring_buffer_record_enable(buffer);
+	arch_spin_unlock(&tr->max_lock);
 
  out:
 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
 }
 
 /**
- * tracing_stop - quick stop of the tracer
+ * tracing_start - quick start of the tracer
  *
- * Light weight way to stop tracing. Use in conjunction with
- * tracing_start.
+ * If tracing is enabled but was stopped by tracing_stop,
+ * this will start the tracer back up.
  */
-void tracing_stop(void)
+void tracing_start(void)
+
+{
+	return tracing_start_tr(&global_trace);
+}
+
+static void tracing_stop_tr(struct trace_array *tr)
 {
 	struct trace_buffer *buffer;
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
-	if (global_trace.stop_count++)
+	raw_spin_lock_irqsave(&tr->start_lock, flags);
+	if (tr->stop_count++)
 		goto out;
 
 	/* Prevent the buffers from switching */
-	arch_spin_lock(&global_trace.max_lock);
+	arch_spin_lock(&tr->max_lock);
 
-	buffer = global_trace.array_buffer.buffer;
+	buffer = tr->array_buffer.buffer;
 	if (buffer)
 		ring_buffer_record_disable(buffer);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
-	buffer = global_trace.max_buffer.buffer;
+	buffer = tr->max_buffer.buffer;
 	if (buffer)
 		ring_buffer_record_disable(buffer);
 #endif
 
-	arch_spin_unlock(&global_trace.max_lock);
+	arch_spin_unlock(&tr->max_lock);
 
  out:
-	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
+	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
 }
 
-static void tracing_stop_tr(struct trace_array *tr)
+/**
+ * tracing_stop - quick stop of the tracer
+ *
+ * Light weight way to stop tracing. Use in conjunction with
+ * tracing_start.
+ */
+void tracing_stop(void)
 {
-	struct trace_buffer *buffer;
-	unsigned long flags;
-
-	/* If global, we need to also stop the max tracer */
-	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
-		return tracing_stop();
-
-	raw_spin_lock_irqsave(&tr->start_lock, flags);
-	if (tr->stop_count++)
-		goto out;
-
-	buffer = tr->array_buffer.buffer;
-	if (buffer)
-		ring_buffer_record_disable(buffer);
-
- out:
-	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
+	return tracing_stop_tr(&global_trace);
 }
 
 static int trace_save_cmdline(struct task_struct *tsk)
@@ -2770,8 +2728,11 @@ void trace_buffered_event_enable(void)
 	for_each_tracing_cpu(cpu) {
 		page = alloc_pages_node(cpu_to_node(cpu),
 					GFP_KERNEL | __GFP_NORETRY, 0);
-		if (!page)
-			goto failed;
+		/* This is just an optimization and can handle failures */
+		if (!page) {
+			pr_err("Failed to allocate event buffer\n");
+			break;
+		}
 
 		event = page_address(page);
 		memset(event, 0, sizeof(*event));
@@ -2785,10 +2746,6 @@ void trace_buffered_event_enable(void)
 			WARN_ON_ONCE(1);
 		preempt_enable();
 	}
-
-	return;
- failed:
-	trace_buffered_event_disable();
 }
 
 static void enable_trace_buffered_event(void *data)
@@ -2823,11 +2780,9 @@ void trace_buffered_event_disable(void)
 	if (--trace_buffered_event_ref)
 		return;
 
-	preempt_disable();
 	/* For each CPU, set the buffer as used. */
-	smp_call_function_many(tracing_buffer_mask,
-			       disable_trace_buffered_event, NULL, 1);
-	preempt_enable();
+	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
+			 NULL, true);
 
 	/* Wait for all current users to finish */
 	synchronize_rcu();
@@ -2836,17 +2791,19 @@ void trace_buffered_event_disable(void)
 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
 		per_cpu(trace_buffered_event, cpu) = NULL;
 	}
+
 	/*
-	 * Make sure trace_buffered_event is NULL before clearing
-	 * trace_buffered_event_cnt.
+	 * Wait for all CPUs that potentially started checking if they can use
+	 * their event buffer only after the previous synchronize_rcu() call and
+	 * they still read a valid pointer from trace_buffered_event. It must be
+	 * ensured they don't see cleared trace_buffered_event_cnt else they
+	 * could wrongly decide to use the pointed-to buffer which is now freed.
 	 */
-	smp_wmb();
+	synchronize_rcu();
 
-	preempt_disable();
-	/* Do the work on each cpu */
-	smp_call_function_many(tracing_buffer_mask,
-			       enable_trace_buffered_event, NULL, 1);
-	preempt_enable();
+	/* For each CPU, relinquish the buffer */
+	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
+			 true);
 }
 
 static struct trace_buffer *temp_buffer;
@@ -6387,13 +6344,15 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
 	if (!tr->array_buffer.buffer)
 		return 0;
 
+	/* Do not allow tracing while resizng ring buffer */
+	tracing_stop_tr(tr);
+
 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
 	if (ret < 0)
-		return ret;
+		goto out_start;
 
 #ifdef CONFIG_TRACER_MAX_TRACE
-	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
-	    !tr->current_trace->use_max_tr)
+	if (!tr->current_trace->use_max_tr)
 		goto out;
 
 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
@@ -6418,7 +6377,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
 			WARN_ON(1);
 			tracing_disabled = 1;
 		}
-		return ret;
+		goto out_start;
 	}
 
 	update_buffer_entries(&tr->max_buffer, cpu);
@@ -6427,7 +6386,8 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
 	update_buffer_entries(&tr->array_buffer, cpu);
-
+ out_start:
+	tracing_start_tr(tr);
 	return ret;
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6e578f576a6f..2989b57e154a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1684,9 +1684,6 @@ static int wq_select_unbound_cpu(int cpu)
 		pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n");
 	}
 
-	if (cpumask_empty(wq_unbound_cpumask))
-		return cpu;
-
 	new_cpu = __this_cpu_read(wq_rr_cpu_last);
 	new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
 	if (unlikely(new_cpu >= nr_cpu_ids)) {
@@ -6515,6 +6512,17 @@ static inline void wq_watchdog_init(void) { }
 
 #endif	/* CONFIG_WQ_WATCHDOG */
 
+static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask)
+{
+	if (!cpumask_intersects(wq_unbound_cpumask, mask)) {
+		pr_warn("workqueue: Restricting unbound_cpumask (%*pb) with %s (%*pb) leaves no CPU, ignoring\n",
+			cpumask_pr_args(wq_unbound_cpumask), name, cpumask_pr_args(mask));
+		return;
+	}
+
+	cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, mask);
+}
+
 /**
  * workqueue_init_early - early init for workqueue subsystem
  *
@@ -6534,11 +6542,11 @@ void __init workqueue_init_early(void)
 	BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
 
 	BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
-	cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_WQ));
-	cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN));
-
+	cpumask_copy(wq_unbound_cpumask, cpu_possible_mask);
+	restrict_unbound_cpumask("HK_TYPE_WQ", housekeeping_cpumask(HK_TYPE_WQ));
+	restrict_unbound_cpumask("HK_TYPE_DOMAIN", housekeeping_cpumask(HK_TYPE_DOMAIN));
 	if (!cpumask_empty(&wq_cmdline_cpumask))
-		cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, &wq_cmdline_cpumask);
+		restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask);
 
 	pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
 
diff --git a/lib/closure.c b/lib/closure.c
index f86c9eeafb35..c16540552d61 100644
--- a/lib/closure.c
+++ b/lib/closure.c
@@ -36,7 +36,7 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
 			closure_debug_destroy(cl);
 
 			if (destructor)
-				destructor(cl);
+				destructor(&cl->work);
 
 			if (parent)
 				closure_put(parent);
@@ -108,8 +108,9 @@ struct closure_syncer {
 	int			done;
 };
 
-static void closure_sync_fn(struct closure *cl)
+static CLOSURE_CALLBACK(closure_sync_fn)
 {
+	struct closure *cl = container_of(ws, struct closure, work);
 	struct closure_syncer *s = cl->s;
 	struct task_struct *p;
 
diff --git a/lib/fw_table.c b/lib/fw_table.c
index b51f30a28e47..294df54e33b6 100644
--- a/lib/fw_table.c
+++ b/lib/fw_table.c
@@ -7,7 +7,7 @@
  *  Copyright (C) 2023 Intel Corp.
  */
 #include <linux/errno.h>
-#include <linux/fw_table.h>
+#include <linux/acpi.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/lib/group_cpus.c b/lib/group_cpus.c
index aa3f6815bb12..ee272c4cefcc 100644
--- a/lib/group_cpus.c
+++ b/lib/group_cpus.c
@@ -366,13 +366,25 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps)
 	if (!masks)
 		goto fail_node_to_cpumask;
 
-	/* Stabilize the cpumasks */
-	cpus_read_lock();
 	build_node_to_cpumask(node_to_cpumask);
 
+	/*
+	 * Make a local cache of 'cpu_present_mask', so the two stages
+	 * spread can observe consistent 'cpu_present_mask' without holding
+	 * cpu hotplug lock, then we can reduce deadlock risk with cpu
+	 * hotplug code.
+	 *
+	 * Here CPU hotplug may happen when reading `cpu_present_mask`, and
+	 * we can live with the case because it only affects that hotplug
+	 * CPU is handled in the 1st or 2nd stage, and either way is correct
+	 * from API user viewpoint since 2-stage spread is sort of
+	 * optimization.
+	 */
+	cpumask_copy(npresmsk, data_race(cpu_present_mask));
+
 	/* grouping present CPUs first */
 	ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
-				  cpu_present_mask, nmsk, masks);
+				  npresmsk, nmsk, masks);
 	if (ret < 0)
 		goto fail_build_affinity;
 	nr_present = ret;
@@ -387,15 +399,13 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps)
 		curgrp = 0;
 	else
 		curgrp = nr_present;
-	cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
+	cpumask_andnot(npresmsk, cpu_possible_mask, npresmsk);
 	ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
 				  npresmsk, nmsk, masks);
 	if (ret >= 0)
 		nr_others = ret;
 
  fail_build_affinity:
-	cpus_read_unlock();
-
 	if (ret >= 0)
 		WARN_ON(nr_present + nr_others < numgrps);
 
diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c
index 99d2a3a528e1..de2113a58fa0 100644
--- a/lib/kunit/kunit-test.c
+++ b/lib/kunit/kunit-test.c
@@ -562,7 +562,7 @@ static void kunit_log_test(struct kunit *test)
 	KUNIT_EXPECT_TRUE(test, test->log->append_newlines);
 
 	full_log = string_stream_get_string(test->log);
-	kunit_add_action(test, (kunit_action_t *)kfree, full_log);
+	kunit_add_action(test, kfree_wrapper, full_log);
 	KUNIT_EXPECT_NOT_ERR_OR_NULL(test,
 				     strstr(full_log, "put this in log."));
 	KUNIT_EXPECT_NOT_ERR_OR_NULL(test,
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index f2eb71f1a66c..7aceb07a1af9 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -338,6 +338,36 @@ void kunit_init_test(struct kunit *test, const char *name, struct string_stream
 }
 EXPORT_SYMBOL_GPL(kunit_init_test);
 
+/* Only warn when a test takes more than twice the threshold */
+#define KUNIT_SPEED_WARNING_MULTIPLIER	2
+
+/* Slow tests are defined as taking more than 1s */
+#define KUNIT_SPEED_SLOW_THRESHOLD_S	1
+
+#define KUNIT_SPEED_SLOW_WARNING_THRESHOLD_S	\
+	(KUNIT_SPEED_WARNING_MULTIPLIER * KUNIT_SPEED_SLOW_THRESHOLD_S)
+
+#define s_to_timespec64(s) ns_to_timespec64((s) * NSEC_PER_SEC)
+
+static void kunit_run_case_check_speed(struct kunit *test,
+				       struct kunit_case *test_case,
+				       struct timespec64 duration)
+{
+	struct timespec64 slow_thr =
+		s_to_timespec64(KUNIT_SPEED_SLOW_WARNING_THRESHOLD_S);
+	enum kunit_speed speed = test_case->attr.speed;
+
+	if (timespec64_compare(&duration, &slow_thr) < 0)
+		return;
+
+	if (speed == KUNIT_SPEED_VERY_SLOW || speed == KUNIT_SPEED_SLOW)
+		return;
+
+	kunit_warn(test,
+		   "Test should be marked slow (runtime: %lld.%09lds)",
+		   duration.tv_sec, duration.tv_nsec);
+}
+
 /*
  * Initializes and runs test case. Does not clean up or do post validations.
  */
@@ -345,6 +375,8 @@ static void kunit_run_case_internal(struct kunit *test,
 				    struct kunit_suite *suite,
 				    struct kunit_case *test_case)
 {
+	struct timespec64 start, end;
+
 	if (suite->init) {
 		int ret;
 
@@ -356,7 +388,13 @@ static void kunit_run_case_internal(struct kunit *test,
 		}
 	}
 
+	ktime_get_ts64(&start);
+
 	test_case->run_case(test);
+
+	ktime_get_ts64(&end);
+
+	kunit_run_case_check_speed(test, test_case, timespec64_sub(end, start));
 }
 
 static void kunit_case_internal_cleanup(struct kunit *test)
@@ -670,6 +708,8 @@ int __kunit_test_suites_init(struct kunit_suite * const * const suites, int num_
 		return 0;
 	}
 
+	kunit_suite_counter = 1;
+
 	static_branch_inc(&kunit_running);
 
 	for (i = 0; i < num_suites; i++) {
@@ -696,8 +736,6 @@ void __kunit_test_suites_exit(struct kunit_suite **suites, int num_suites)
 
 	for (i = 0; i < num_suites; i++)
 		kunit_exit_suite(suites[i]);
-
-	kunit_suite_counter = 1;
 }
 EXPORT_SYMBOL_GPL(__kunit_test_suites_exit);
 
diff --git a/lib/objpool.c b/lib/objpool.c
index ce0087f64400..cfdc02420884 100644
--- a/lib/objpool.c
+++ b/lib/objpool.c
@@ -201,6 +201,23 @@ static inline void *objpool_try_get_slot(struct objpool_head *pool, int cpu)
 	while (head != READ_ONCE(slot->last)) {
 		void *obj;
 
+		/*
+		 * data visibility of 'last' and 'head' could be out of
+		 * order since memory updating of 'last' and 'head' are
+		 * performed in push() and pop() independently
+		 *
+		 * before any retrieving attempts, pop() must guarantee
+		 * 'last' is behind 'head', that is to say, there must
+		 * be available objects in slot, which could be ensured
+		 * by condition 'last != head && last - head <= nr_objs'
+		 * that is equivalent to 'last - head - 1 < nr_objs' as
+		 * 'last' and 'head' are both unsigned int32
+		 */
+		if (READ_ONCE(slot->last) - head - 1 >= pool->nr_objs) {
+			head = READ_ONCE(slot->head);
+			continue;
+		}
+
 		/* obj must be retrieved before moving forward head */
 		obj = READ_ONCE(slot->entries[head & slot->mask]);
 
diff --git a/mm/Kconfig b/mm/Kconfig
index 89971a894b60..57cd378c73d6 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1201,13 +1201,6 @@ config ANON_VMA_NAME
 	  area from being merged with adjacent virtual memory areas due to the
 	  difference in their name.
 
-config USERFAULTFD
-	bool "Enable userfaultfd() system call"
-	depends on MMU
-	help
-	  Enable the userfaultfd() system call that allows to intercept and
-	  handle page faults in userland.
-
 config HAVE_ARCH_USERFAULTFD_WP
 	bool
 	help
@@ -1218,6 +1211,14 @@ config HAVE_ARCH_USERFAULTFD_MINOR
 	help
 	  Arch has userfaultfd minor fault support
 
+menuconfig USERFAULTFD
+	bool "Enable userfaultfd() system call"
+	depends on MMU
+	help
+	  Enable the userfaultfd() system call that allows to intercept and
+	  handle page faults in userland.
+
+if USERFAULTFD
 config PTE_MARKER_UFFD_WP
 	bool "Userfaultfd write protection support for shmem/hugetlbfs"
 	default y
@@ -1227,6 +1228,7 @@ config PTE_MARKER_UFFD_WP
 	  Allows to create marker PTEs for userfaultfd write protection
 	  purposes.  It is required to enable userfaultfd write protection on
 	  file-backed memory types like shmem and hugetlbfs.
+endif # USERFAULTFD
 
 # multi-gen LRU {
 config LRU_GEN
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 6262d55904e7..ce1562783e7e 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1225,6 +1225,7 @@ static void damon_split_region_at(struct damon_target *t,
 	new->age = r->age;
 	new->last_nr_accesses = r->last_nr_accesses;
 	new->nr_accesses_bp = r->nr_accesses_bp;
+	new->nr_accesses = r->nr_accesses;
 
 	damon_insert_region(new, r, damon_next_region(r), t);
 }
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index be667236b8e6..fe0fe2562000 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -139,6 +139,13 @@ static const struct kobj_type damon_sysfs_scheme_region_ktype = {
  * damon_sysfs_before_damos_apply() understands the situation by showing the
  * 'finished' status and do nothing.
  *
+ * If DAMOS is not applied to any region due to any reasons including the
+ * access pattern, the watermarks, the quotas, and the filters,
+ * ->before_damos_apply() will not be called back.  Until the situation is
+ * changed, the update will not be finished.  To avoid this,
+ * damon_sysfs_after_sampling() set the status as 'finished' if more than two
+ * apply intervals of the scheme is passed while the state is 'idle'.
+ *
  *  Finally, the tried regions request handling finisher function
  *  (damon_sysfs_schemes_update_regions_stop()) unregisters the callbacks.
  */
@@ -154,6 +161,7 @@ struct damon_sysfs_scheme_regions {
 	int nr_regions;
 	unsigned long total_bytes;
 	enum damos_sysfs_regions_upd_status upd_status;
+	unsigned long upd_timeout_jiffies;
 };
 
 static struct damon_sysfs_scheme_regions *
@@ -1854,7 +1862,9 @@ static int damon_sysfs_after_sampling(struct damon_ctx *ctx)
 	for (i = 0; i < sysfs_schemes->nr; i++) {
 		sysfs_regions = sysfs_schemes->schemes_arr[i]->tried_regions;
 		if (sysfs_regions->upd_status ==
-				DAMOS_TRIED_REGIONS_UPD_STARTED)
+				DAMOS_TRIED_REGIONS_UPD_STARTED ||
+				time_after(jiffies,
+					sysfs_regions->upd_timeout_jiffies))
 			sysfs_regions->upd_status =
 				DAMOS_TRIED_REGIONS_UPD_FINISHED;
 	}
@@ -1885,14 +1895,41 @@ int damon_sysfs_schemes_clear_regions(
 	return 0;
 }
 
+static struct damos *damos_sysfs_nth_scheme(int n, struct damon_ctx *ctx)
+{
+	struct damos *scheme;
+	int i = 0;
+
+	damon_for_each_scheme(scheme, ctx) {
+		if (i == n)
+			return scheme;
+		i++;
+	}
+	return NULL;
+}
+
 static void damos_tried_regions_init_upd_status(
-		struct damon_sysfs_schemes *sysfs_schemes)
+		struct damon_sysfs_schemes *sysfs_schemes,
+		struct damon_ctx *ctx)
 {
 	int i;
+	struct damos *scheme;
+	struct damon_sysfs_scheme_regions *sysfs_regions;
 
-	for (i = 0; i < sysfs_schemes->nr; i++)
-		sysfs_schemes->schemes_arr[i]->tried_regions->upd_status =
-			DAMOS_TRIED_REGIONS_UPD_IDLE;
+	for (i = 0; i < sysfs_schemes->nr; i++) {
+		sysfs_regions = sysfs_schemes->schemes_arr[i]->tried_regions;
+		scheme = damos_sysfs_nth_scheme(i, ctx);
+		if (!scheme) {
+			sysfs_regions->upd_status =
+				DAMOS_TRIED_REGIONS_UPD_FINISHED;
+			continue;
+		}
+		sysfs_regions->upd_status = DAMOS_TRIED_REGIONS_UPD_IDLE;
+		sysfs_regions->upd_timeout_jiffies = jiffies +
+			2 * usecs_to_jiffies(scheme->apply_interval_us ?
+					scheme->apply_interval_us :
+					ctx->attrs.sample_interval);
+	}
 }
 
 /* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */
@@ -1902,7 +1939,7 @@ int damon_sysfs_schemes_update_regions_start(
 {
 	damon_sysfs_schemes_clear_regions(sysfs_schemes, ctx);
 	damon_sysfs_schemes_for_damos_callback = sysfs_schemes;
-	damos_tried_regions_init_upd_status(sysfs_schemes);
+	damos_tried_regions_init_upd_status(sysfs_schemes, ctx);
 	damos_regions_upd_total_bytes_only = total_bytes_only;
 	ctx->callback.before_damos_apply = damon_sysfs_before_damos_apply;
 	ctx->callback.after_sampling = damon_sysfs_after_sampling;
diff --git a/mm/filemap.c b/mm/filemap.c
index 32eedf3afd45..f1c8c278310f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3371,7 +3371,7 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct folio *folio,
 		}
 	}
 
-	if (pmd_none(*vmf->pmd))
+	if (pmd_none(*vmf->pmd) && vmf->prealloc_pte)
 		pmd_install(mm, vmf->pmd, &vmf->prealloc_pte);
 
 	return false;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1169ef2f2176..6feb3e0630d1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1182,6 +1182,13 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
 	return (get_vma_private_data(vma) & flag) != 0;
 }
 
+bool __vma_private_lock(struct vm_area_struct *vma)
+{
+	return !(vma->vm_flags & VM_MAYSHARE) &&
+		get_vma_private_data(vma) & ~HPAGE_RESV_MASK &&
+		is_vma_resv_set(vma, HPAGE_RESV_OWNER);
+}
+
 void hugetlb_dup_vma_private(struct vm_area_struct *vma)
 {
 	VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 1eacca03bedd..5501363d6b31 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -642,32 +642,16 @@ static struct kmemleak_object *__alloc_object(gfp_t gfp)
 	if (!object) {
 		pr_warn("Cannot allocate a kmemleak_object structure\n");
 		kmemleak_disable();
+		return NULL;
 	}
 
-	return object;
-}
-
-static int __link_object(struct kmemleak_object *object, unsigned long ptr,
-			 size_t size, int min_count, bool is_phys)
-{
-
-	struct kmemleak_object *parent;
-	struct rb_node **link, *rb_parent;
-	unsigned long untagged_ptr;
-	unsigned long untagged_objp;
-
 	INIT_LIST_HEAD(&object->object_list);
 	INIT_LIST_HEAD(&object->gray_list);
 	INIT_HLIST_HEAD(&object->area_list);
 	raw_spin_lock_init(&object->lock);
 	atomic_set(&object->use_count, 1);
-	object->flags = OBJECT_ALLOCATED | (is_phys ? OBJECT_PHYS : 0);
-	object->pointer = ptr;
-	object->size = kfence_ksize((void *)ptr) ?: size;
 	object->excess_ref = 0;
-	object->min_count = min_count;
 	object->count = 0;			/* white color initially */
-	object->jiffies = jiffies;
 	object->checksum = 0;
 	object->del_state = 0;
 
@@ -692,6 +676,24 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr,
 	/* kernel backtrace */
 	object->trace_handle = set_track_prepare();
 
+	return object;
+}
+
+static int __link_object(struct kmemleak_object *object, unsigned long ptr,
+			 size_t size, int min_count, bool is_phys)
+{
+
+	struct kmemleak_object *parent;
+	struct rb_node **link, *rb_parent;
+	unsigned long untagged_ptr;
+	unsigned long untagged_objp;
+
+	object->flags = OBJECT_ALLOCATED | (is_phys ? OBJECT_PHYS : 0);
+	object->pointer = ptr;
+	object->size = kfence_ksize((void *)ptr) ?: size;
+	object->min_count = min_count;
+	object->jiffies = jiffies;
+
 	untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr);
 	/*
 	 * Only update min_addr and max_addr with object
@@ -1150,6 +1152,7 @@ EXPORT_SYMBOL_GPL(kmemleak_free_percpu);
 void __ref kmemleak_update_trace(const void *ptr)
 {
 	struct kmemleak_object *object;
+	depot_stack_handle_t trace_handle;
 	unsigned long flags;
 
 	pr_debug("%s(0x%px)\n", __func__, ptr);
@@ -1166,8 +1169,9 @@ void __ref kmemleak_update_trace(const void *ptr)
 		return;
 	}
 
+	trace_handle = set_track_prepare();
 	raw_spin_lock_irqsave(&object->lock, flags);
-	object->trace_handle = set_track_prepare();
+	object->trace_handle = trace_handle;
 	raw_spin_unlock_irqrestore(&object->lock, flags);
 
 	put_object(object);
diff --git a/mm/madvise.c b/mm/madvise.c
index cf4d694280e9..6214a1ab5654 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -335,6 +335,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
 	struct folio *folio = NULL;
 	LIST_HEAD(folio_list);
 	bool pageout_anon_only_filter;
+	unsigned int batch_count = 0;
 
 	if (fatal_signal_pending(current))
 		return -EINTR;
@@ -416,6 +417,7 @@ huge_unlock:
 regular_folio:
 #endif
 	tlb_change_page_size(tlb, PAGE_SIZE);
+restart:
 	start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	if (!start_pte)
 		return 0;
@@ -424,6 +426,15 @@ regular_folio:
 	for (; addr < end; pte++, addr += PAGE_SIZE) {
 		ptent = ptep_get(pte);
 
+		if (++batch_count == SWAP_CLUSTER_MAX) {
+			batch_count = 0;
+			if (need_resched()) {
+				pte_unmap_unlock(start_pte, ptl);
+				cond_resched();
+				goto restart;
+			}
+		}
+
 		if (pte_none(ptent))
 			continue;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1c1061df9cd1..b226090fd906 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3166,6 +3166,7 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
 	return NULL;
 
 from_memcg:
+	objcg = NULL;
 	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
 		/*
 		 * Memcg pointer is protected by scope (see set_active_memcg())
@@ -3176,7 +3177,6 @@ from_memcg:
 		objcg = rcu_dereference_check(memcg->objcg, 1);
 		if (likely(objcg))
 			break;
-		objcg = NULL;
 	}
 
 	return objcg;
diff --git a/mm/memory.c b/mm/memory.c
index 1f18ed4a5497..5c757fba8858 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1517,6 +1517,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				continue;
 		} else {
 			/* We should have covered all the swap entry types */
+			pr_alert("unrecognized swap entry 0x%lx\n", entry.val);
 			WARN_ON_ONCE(1);
 		}
 		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ab41a511e20a..7a5fc89a8652 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1129,6 +1129,9 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages)
 	kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
 }
 
+/*
+ * Must be called with mem_hotplug_lock in write mode.
+ */
 int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
 		       struct zone *zone, struct memory_group *group)
 {
@@ -1149,7 +1152,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
 			 !IS_ALIGNED(pfn + nr_pages, PAGES_PER_SECTION)))
 		return -EINVAL;
 
-	mem_hotplug_begin();
 
 	/* associate pfn range with the zone */
 	move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
@@ -1208,7 +1210,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
 	writeback_set_ratelimit();
 
 	memory_notify(MEM_ONLINE, &arg);
-	mem_hotplug_done();
 	return 0;
 
 failed_addition:
@@ -1217,7 +1218,6 @@ failed_addition:
 		 (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
 	memory_notify(MEM_CANCEL_ONLINE, &arg);
 	remove_pfn_range_from_zone(zone, pfn, nr_pages);
-	mem_hotplug_done();
 	return ret;
 }
 
@@ -1458,7 +1458,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 	/* create memory block devices after memory was added */
 	ret = create_memory_block_devices(start, size, params.altmap, group);
 	if (ret) {
-		arch_remove_memory(start, size, NULL);
+		arch_remove_memory(start, size, params.altmap);
 		goto error_free;
 	}
 
@@ -1863,6 +1863,9 @@ static int count_system_ram_pages_cb(unsigned long start_pfn,
 	return 0;
 }
 
+/*
+ * Must be called with mem_hotplug_lock in write mode.
+ */
 int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
 			struct zone *zone, struct memory_group *group)
 {
@@ -1885,8 +1888,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
 			 !IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION)))
 		return -EINVAL;
 
-	mem_hotplug_begin();
-
 	/*
 	 * Don't allow to offline memory blocks that contain holes.
 	 * Consequently, memory blocks with holes can never get onlined
@@ -2031,7 +2032,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
 
 	memory_notify(MEM_OFFLINE, &arg);
 	remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
-	mem_hotplug_done();
 	return 0;
 
 failed_removal_isolated:
@@ -2046,7 +2046,6 @@ failed_removal:
 		 (unsigned long long) start_pfn << PAGE_SHIFT,
 		 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
 		 reason);
-	mem_hotplug_done();
 	return ret;
 }
 
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index aff31cd944c2..b240d9aae4a6 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -183,7 +183,7 @@ out:
 }
 
 static const struct genl_multicast_group dropmon_mcgrps[] = {
-	{ .name = "events", },
+	{ .name = "events", .cap_sys_admin = 1 },
 };
 
 static void send_dm_alert(struct work_struct *work)
@@ -1619,11 +1619,13 @@ static const struct genl_small_ops dropmon_ops[] = {
 		.cmd = NET_DM_CMD_START,
 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = net_dm_cmd_trace,
+		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = NET_DM_CMD_STOP,
 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = net_dm_cmd_trace,
+		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = NET_DM_CMD_CONFIG_GET,
diff --git a/net/core/filter.c b/net/core/filter.c
index 7e4d7c3bcc84..1737884be52f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2602,6 +2602,22 @@ BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
 	return 0;
 }
 
+static void sk_msg_reset_curr(struct sk_msg *msg)
+{
+	u32 i = msg->sg.start;
+	u32 len = 0;
+
+	do {
+		len += sk_msg_elem(msg, i)->length;
+		sk_msg_iter_var_next(i);
+		if (len >= msg->sg.size)
+			break;
+	} while (i != msg->sg.end);
+
+	msg->sg.curr = i;
+	msg->sg.copybreak = 0;
+}
+
 static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
 	.func           = bpf_msg_cork_bytes,
 	.gpl_only       = false,
@@ -2721,6 +2737,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
 		      msg->sg.end - shift + NR_MSG_FRAG_IDS :
 		      msg->sg.end - shift;
 out:
+	sk_msg_reset_curr(msg);
 	msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
 	msg->data_end = msg->data + bytes;
 	return 0;
@@ -2857,6 +2874,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
 		msg->sg.data[new] = rsge;
 	}
 
+	sk_msg_reset_curr(msg);
 	sk_msg_compute_data_pointers(msg);
 	return 0;
 }
@@ -3025,6 +3043,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
 
 	sk_mem_uncharge(msg->sk, len - pop);
 	msg->sg.size -= (len - pop);
+	sk_msg_reset_curr(msg);
 	sk_msg_compute_data_pointers(msg);
 	return 0;
 }
diff --git a/net/core/scm.c b/net/core/scm.c
index 880027ecf516..7dc47c17d863 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -26,6 +26,7 @@
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
 #include <linux/errqueue.h>
+#include <linux/io_uring.h>
 
 #include <linux/uaccess.h>
 
@@ -103,6 +104,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 
 		if (fd < 0 || !(file = fget_raw(fd)))
 			return -EBADF;
+		/* don't allow io_uring files */
+		if (io_uring_get_socket(file)) {
+			fput(file);
+			return -EINVAL;
+		}
 		*fpp++ = file;
 		fpl->count++;
 	}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 6c31eefbd777..93ecfceac1bc 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -826,6 +826,8 @@ static void sk_psock_destroy(struct work_struct *work)
 
 	if (psock->sk_redir)
 		sock_put(psock->sk_redir);
+	if (psock->sk_pair)
+		sock_put(psock->sk_pair);
 	sock_put(psock->sk);
 	kfree(psock);
 }
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 3bbd5afb7b31..fe3553f60bf3 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -505,6 +505,7 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
 				ret = skb->len;
 			break;
 		}
+		ret = 0;
 	}
 	rtnl_unlock();
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 76c3ea75b8dd..efeeca2b1328 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -216,8 +216,10 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
 	int tv = get_random_u32_below(max_delay);
 
 	im->tm_running = 1;
-	if (!mod_timer(&im->timer, jiffies+tv+2))
-		refcount_inc(&im->refcnt);
+	if (refcount_inc_not_zero(&im->refcnt)) {
+		if (mod_timer(&im->timer, jiffies + tv + 2))
+			ip_ma_put(im);
+	}
 }
 
 static void igmp_gq_start_timer(struct in_device *in_dev)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 22a26d1d29a0..5169c3c72cff 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -635,15 +635,18 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 	}
 
 	if (dev->header_ops) {
+		int pull_len = tunnel->hlen + sizeof(struct iphdr);
+
 		if (skb_cow_head(skb, 0))
 			goto free_skb;
 
 		tnl_params = (const struct iphdr *)skb->data;
 
-		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
-		 * to gre header.
-		 */
-		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+		if (!pskb_network_may_pull(skb, pull_len))
+			goto free_skb;
+
+		/* ip_tunnel_xmit() needs skb->data pointing to gre header. */
+		skb_pull(skb, pull_len);
 		skb_reset_mac_header(skb);
 
 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 53bcc17c91e4..ff6838ca2e58 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3368,9 +3368,25 @@ int tcp_set_window_clamp(struct sock *sk, int val)
 			return -EINVAL;
 		tp->window_clamp = 0;
 	} else {
-		tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
-			SOCK_MIN_RCVBUF / 2 : val;
-		tp->rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp);
+		u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp;
+		u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
+						SOCK_MIN_RCVBUF / 2 : val;
+
+		if (new_window_clamp == old_window_clamp)
+			return 0;
+
+		tp->window_clamp = new_window_clamp;
+		if (new_window_clamp < old_window_clamp) {
+			/* need to apply the reserved mem provisioning only
+			 * when shrinking the window clamp
+			 */
+			__tcp_adjust_rcv_ssthresh(sk, tp->window_clamp);
+
+		} else {
+			new_rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp);
+			tp->rcv_ssthresh = max(new_rcv_ssthresh,
+					       tp->rcv_ssthresh);
+		}
 	}
 	return 0;
 }
@@ -3594,6 +3610,10 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
 		break;
 
 	case TCP_AO_REPAIR:
+		if (!tcp_can_repair_sock(sk)) {
+			err = -EPERM;
+			break;
+		}
 		err = tcp_ao_set_repair(sk, optval, optlen);
 		break;
 #ifdef CONFIG_TCP_AO
@@ -4293,6 +4313,8 @@ zerocopy_rcv_out:
 	}
 #endif
 	case TCP_AO_REPAIR:
+		if (!tcp_can_repair_sock(sk))
+			return -EPERM;
 		return tcp_ao_get_repair(sk, optval, optlen);
 	case TCP_AO_GET_KEYS:
 	case TCP_AO_INFO: {
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index 7696417d0640..f8308d3f565e 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -851,7 +851,7 @@ void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
 	const struct tcp_ao_hdr *aoh;
 	struct tcp_ao_key *key;
 
-	treq->maclen = 0;
+	treq->used_tcp_ao = false;
 
 	if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh)
 		return;
@@ -863,7 +863,7 @@ void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
 
 	treq->ao_rcv_next = aoh->keyid;
 	treq->ao_keyid = aoh->rnext_keyid;
-	treq->maclen = tcp_ao_maclen(key);
+	treq->used_tcp_ao = true;
 }
 
 static enum skb_drop_reason
@@ -1100,7 +1100,7 @@ void tcp_ao_connect_init(struct sock *sk)
 			ao_info->current_key = key;
 		if (!ao_info->rnext_key)
 			ao_info->rnext_key = key;
-		tp->tcp_header_len += tcp_ao_len(key);
+		tp->tcp_header_len += tcp_ao_len_aligned(key);
 
 		ao_info->lisn = htonl(tp->write_seq);
 		ao_info->snd_sne = 0;
@@ -1346,7 +1346,7 @@ static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key)
 	syn_tcp_option_space -= TCPOLEN_MSS_ALIGNED;
 	syn_tcp_option_space -= TCPOLEN_TSTAMP_ALIGNED;
 	syn_tcp_option_space -= TCPOLEN_WSCALE_ALIGNED;
-	if (tcp_ao_len(key) > syn_tcp_option_space) {
+	if (tcp_ao_len_aligned(key) > syn_tcp_option_space) {
 		err = -EMSGSIZE;
 		goto err_kfree;
 	}
@@ -1608,6 +1608,15 @@ static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family,
 		if (!dev || !l3index)
 			return -EINVAL;
 
+		if (!bound_dev_if || bound_dev_if != cmd.ifindex) {
+			/* tcp_ao_established_key() doesn't expect having
+			 * non peer-matching key on an established TCP-AO
+			 * connection.
+			 */
+			if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)))
+				return -EINVAL;
+		}
+
 		/* It's still possible to bind after adding keys or even
 		 * re-bind to a different dev (with CAP_NET_RAW).
 		 * So, no reason to return error here, rather try to be
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bcb55d98004c..90de838a2745 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3871,8 +3871,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	 * then we can probably ignore it.
 	 */
 	if (before(ack, prior_snd_una)) {
+		u32 max_window;
+
+		/* do not accept ACK for bytes we never sent. */
+		max_window = min_t(u64, tp->max_window, tp->bytes_acked);
 		/* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
-		if (before(ack, prior_snd_una - tp->max_window)) {
+		if (before(ack, prior_snd_una - max_window)) {
 			if (!(flag & FLAG_NO_CHALLENGE_ACK))
 				tcp_send_challenge_ack(sk);
 			return -SKB_DROP_REASON_TCP_TOO_OLD_ACK;
@@ -7182,11 +7186,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
 		goto drop_and_release; /* Invalid TCP options */
 	if (aoh) {
-		tcp_rsk(req)->maclen = aoh->length - sizeof(struct tcp_ao_hdr);
+		tcp_rsk(req)->used_tcp_ao = true;
 		tcp_rsk(req)->ao_rcv_next = aoh->keyid;
 		tcp_rsk(req)->ao_keyid = aoh->rnext_keyid;
+
 	} else {
-		tcp_rsk(req)->maclen = 0;
+		tcp_rsk(req)->used_tcp_ao = false;
 	}
 #endif
 	tcp_rsk(req)->snt_isn = isn;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5f693bbd578d..0c50c5a32b84 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -690,7 +690,7 @@ static bool tcp_v4_ao_sign_reset(const struct sock *sk, struct sk_buff *skb,
 
 	reply_options[0] = htonl((TCPOPT_AO << 24) | (tcp_ao_len(key) << 16) |
 				 (aoh->rnext_keyid << 8) | keyid);
-	arg->iov[0].iov_len += round_up(tcp_ao_len(key), 4);
+	arg->iov[0].iov_len += tcp_ao_len_aligned(key);
 	reply->doff = arg->iov[0].iov_len / 4;
 
 	if (tcp_ao_hash_hdr(AF_INET, (char *)&reply_options[1],
@@ -978,7 +978,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
 					  (tcp_ao_len(key->ao_key) << 16) |
 					  (key->ao_key->sndid << 8) |
 					  key->rcv_next);
-		arg.iov[0].iov_len += round_up(tcp_ao_len(key->ao_key), 4);
+		arg.iov[0].iov_len += tcp_ao_len_aligned(key->ao_key);
 		rep.th.doff = arg.iov[0].iov_len / 4;
 
 		tcp_ao_hash_hdr(AF_INET, (char *)&rep.opt[offset],
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a9807eeb311c..9e85f2a0bddd 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -615,7 +615,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 	ao_key = treq->af_specific->ao_lookup(sk, req,
 				tcp_rsk(req)->ao_keyid, -1);
 	if (ao_key)
-		newtp->tcp_header_len += tcp_ao_len(ao_key);
+		newtp->tcp_header_len += tcp_ao_len_aligned(ao_key);
  #endif
 	if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
 		newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index eb13a55d660c..f5ef15e1d9ac 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -825,7 +825,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 		timestamps = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps);
 		if (tcp_key_is_ao(key)) {
 			opts->options |= OPTION_AO;
-			remaining -= tcp_ao_len(key->ao_key);
+			remaining -= tcp_ao_len_aligned(key->ao_key);
 		}
 	}
 
@@ -915,7 +915,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
 			ireq->tstamp_ok &= !ireq->sack_ok;
 	} else if (tcp_key_is_ao(key)) {
 		opts->options |= OPTION_AO;
-		remaining -= tcp_ao_len(key->ao_key);
+		remaining -= tcp_ao_len_aligned(key->ao_key);
 		ireq->tstamp_ok &= !ireq->sack_ok;
 	}
 
@@ -982,7 +982,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
 		size += TCPOLEN_MD5SIG_ALIGNED;
 	} else if (tcp_key_is_ao(key)) {
 		opts->options |= OPTION_AO;
-		size += tcp_ao_len(key->ao_key);
+		size += tcp_ao_len_aligned(key->ao_key);
 	}
 
 	if (likely(tp->rx_opt.tstamp_ok)) {
@@ -3720,7 +3720,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 	if (tcp_rsk_used_ao(req)) {
 #ifdef CONFIG_TCP_AO
 		struct tcp_ao_key *ao_key = NULL;
-		u8 maclen = tcp_rsk(req)->maclen;
 		u8 keyid = tcp_rsk(req)->ao_keyid;
 
 		ao_key = tcp_sk(sk)->af_specific->ao_lookup(sk, req_to_sk(req),
@@ -3730,13 +3729,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 		 * for another peer-matching key, but the peer has requested
 		 * ao_keyid (RFC5925 RNextKeyID), so let's keep it simple here.
 		 */
-		if (unlikely(!ao_key || tcp_ao_maclen(ao_key) != maclen)) {
-			u8 key_maclen = ao_key ? tcp_ao_maclen(ao_key) : 0;
-
+		if (unlikely(!ao_key)) {
 			rcu_read_unlock();
 			kfree_skb(skb);
-			net_warn_ratelimited("TCP-AO: the keyid %u with maclen %u|%u from SYN packet is not present - not sending SYNACK\n",
-					     keyid, maclen, key_maclen);
+			net_warn_ratelimited("TCP-AO: the keyid %u from SYN packet is not present - not sending SYNACK\n",
+					     keyid);
 			return NULL;
 		}
 		key.ao_key = ao_key;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 28b01a068412..7772f42ff2b9 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1511,13 +1511,9 @@ out:
 			if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
 				pn_leaf = fib6_find_prefix(info->nl_net, table,
 							   pn);
-#if RT6_DEBUG >= 2
-				if (!pn_leaf) {
-					WARN_ON(!pn_leaf);
+				if (!pn_leaf)
 					pn_leaf =
 					    info->nl_net->ipv6.fib6_null_entry;
-				}
-#endif
 				fib6_info_hold(pn_leaf);
 				rcu_assign_pointer(pn->leaf, pn_leaf);
 			}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 937a02c2e534..8c6623496dd7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -881,7 +881,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 	if (tcp_key_is_md5(key))
 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
 	if (tcp_key_is_ao(key))
-		tot_len += tcp_ao_len(key->ao_key);
+		tot_len += tcp_ao_len_aligned(key->ao_key);
 
 #ifdef CONFIG_MPTCP
 	if (rst && !tcp_key_is_md5(key)) {
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 037ab74f5ade..cb0291decf2e 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -88,7 +88,7 @@ config MAC80211_LEDS
 
 config MAC80211_DEBUGFS
 	bool "Export mac80211 internals in DebugFS"
-	depends on MAC80211 && DEBUG_FS
+	depends on MAC80211 && CFG80211_DEBUGFS
 	help
 	  Select this to see extensive information about
 	  the internal state of mac80211 in debugfs.
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index ec91e131b29e..80aeb25f1b68 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -22,88 +22,148 @@
 #include "debugfs_netdev.h"
 #include "driver-ops.h"
 
+struct ieee80211_if_read_sdata_data {
+	ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int);
+	struct ieee80211_sub_if_data *sdata;
+};
+
+static ssize_t ieee80211_if_read_sdata_handler(struct wiphy *wiphy,
+					       struct file *file,
+					       char *buf,
+					       size_t bufsize,
+					       void *data)
+{
+	struct ieee80211_if_read_sdata_data *d = data;
+
+	return d->format(d->sdata, buf, bufsize);
+}
+
 static ssize_t ieee80211_if_read_sdata(
-	struct ieee80211_sub_if_data *sdata,
+	struct file *file,
 	char __user *userbuf,
 	size_t count, loff_t *ppos,
 	ssize_t (*format)(const struct ieee80211_sub_if_data *sdata, char *, int))
 {
+	struct ieee80211_sub_if_data *sdata = file->private_data;
+	struct ieee80211_if_read_sdata_data data = {
+		.format = format,
+		.sdata = sdata,
+	};
 	char buf[200];
-	ssize_t ret = -EINVAL;
 
-	wiphy_lock(sdata->local->hw.wiphy);
-	ret = (*format)(sdata, buf, sizeof(buf));
-	wiphy_unlock(sdata->local->hw.wiphy);
+	return wiphy_locked_debugfs_read(sdata->local->hw.wiphy,
+					 file, buf, sizeof(buf),
+					 userbuf, count, ppos,
+					 ieee80211_if_read_sdata_handler,
+					 &data);
+}
 
-	if (ret >= 0)
-		ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret);
+struct ieee80211_if_write_sdata_data {
+	ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int);
+	struct ieee80211_sub_if_data *sdata;
+};
+
+static ssize_t ieee80211_if_write_sdata_handler(struct wiphy *wiphy,
+						struct file *file,
+						char *buf,
+						size_t count,
+						void *data)
+{
+	struct ieee80211_if_write_sdata_data *d = data;
 
-	return ret;
+	return d->write(d->sdata, buf, count);
 }
 
 static ssize_t ieee80211_if_write_sdata(
-	struct ieee80211_sub_if_data *sdata,
+	struct file *file,
 	const char __user *userbuf,
 	size_t count, loff_t *ppos,
 	ssize_t (*write)(struct ieee80211_sub_if_data *sdata, const char *, int))
 {
+	struct ieee80211_sub_if_data *sdata = file->private_data;
+	struct ieee80211_if_write_sdata_data data = {
+		.write = write,
+		.sdata = sdata,
+	};
 	char buf[64];
-	ssize_t ret;
 
-	if (count >= sizeof(buf))
-		return -E2BIG;
+	return wiphy_locked_debugfs_write(sdata->local->hw.wiphy,
+					  file, buf, sizeof(buf),
+					  userbuf, count,
+					  ieee80211_if_write_sdata_handler,
+					  &data);
+}
 
-	if (copy_from_user(buf, userbuf, count))
-		return -EFAULT;
-	buf[count] = '\0';
+struct ieee80211_if_read_link_data {
+	ssize_t (*format)(const struct ieee80211_link_data *, char *, int);
+	struct ieee80211_link_data *link;
+};
 
-	wiphy_lock(sdata->local->hw.wiphy);
-	ret = (*write)(sdata, buf, count);
-	wiphy_unlock(sdata->local->hw.wiphy);
+static ssize_t ieee80211_if_read_link_handler(struct wiphy *wiphy,
+					      struct file *file,
+					      char *buf,
+					      size_t bufsize,
+					      void *data)
+{
+	struct ieee80211_if_read_link_data *d = data;
 
-	return ret;
+	return d->format(d->link, buf, bufsize);
 }
 
 static ssize_t ieee80211_if_read_link(
-	struct ieee80211_link_data *link,
+	struct file *file,
 	char __user *userbuf,
 	size_t count, loff_t *ppos,
 	ssize_t (*format)(const struct ieee80211_link_data *link, char *, int))
 {
+	struct ieee80211_link_data *link = file->private_data;
+	struct ieee80211_if_read_link_data data = {
+		.format = format,
+		.link = link,
+	};
 	char buf[200];
-	ssize_t ret = -EINVAL;
 
-	wiphy_lock(link->sdata->local->hw.wiphy);
-	ret = (*format)(link, buf, sizeof(buf));
-	wiphy_unlock(link->sdata->local->hw.wiphy);
+	return wiphy_locked_debugfs_read(link->sdata->local->hw.wiphy,
+					 file, buf, sizeof(buf),
+					 userbuf, count, ppos,
+					 ieee80211_if_read_link_handler,
+					 &data);
+}
+
+struct ieee80211_if_write_link_data {
+	ssize_t (*write)(struct ieee80211_link_data *, const char *, int);
+	struct ieee80211_link_data *link;
+};
 
-	if (ret >= 0)
-		ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret);
+static ssize_t ieee80211_if_write_link_handler(struct wiphy *wiphy,
+					       struct file *file,
+					       char *buf,
+					       size_t count,
+					       void *data)
+{
+	struct ieee80211_if_write_sdata_data *d = data;
 
-	return ret;
+	return d->write(d->sdata, buf, count);
 }
 
 static ssize_t ieee80211_if_write_link(
-	struct ieee80211_link_data *link,
+	struct file *file,
 	const char __user *userbuf,
 	size_t count, loff_t *ppos,
 	ssize_t (*write)(struct ieee80211_link_data *link, const char *, int))
 {
+	struct ieee80211_link_data *link = file->private_data;
+	struct ieee80211_if_write_link_data data = {
+		.write = write,
+		.link = link,
+	};
 	char buf[64];
-	ssize_t ret;
-
-	if (count >= sizeof(buf))
-		return -E2BIG;
-
-	if (copy_from_user(buf, userbuf, count))
-		return -EFAULT;
-	buf[count] = '\0';
-
-	wiphy_lock(link->sdata->local->hw.wiphy);
-	ret = (*write)(link, buf, count);
-	wiphy_unlock(link->sdata->local->hw.wiphy);
 
-	return ret;
+	return wiphy_locked_debugfs_write(link->sdata->local->hw.wiphy,
+					  file, buf, sizeof(buf),
+					  userbuf, count,
+					  ieee80211_if_write_link_handler,
+					  &data);
 }
 
 #define IEEE80211_IF_FMT(name, type, field, format_string)		\
@@ -173,7 +233,7 @@ static ssize_t ieee80211_if_read_##name(struct file *file,		\
 					char __user *userbuf,		\
 					size_t count, loff_t *ppos)	\
 {									\
-	return ieee80211_if_read_sdata(file->private_data,		\
+	return ieee80211_if_read_sdata(file,				\
 				       userbuf, count, ppos,		\
 				       ieee80211_if_fmt_##name);	\
 }
@@ -183,7 +243,7 @@ static ssize_t ieee80211_if_write_##name(struct file *file,		\
 					 const char __user *userbuf,	\
 					 size_t count, loff_t *ppos)	\
 {									\
-	return ieee80211_if_write_sdata(file->private_data, userbuf,	\
+	return ieee80211_if_write_sdata(file, userbuf,			\
 					count, ppos,			\
 					ieee80211_if_parse_##name);	\
 }
@@ -211,7 +271,7 @@ static ssize_t ieee80211_if_read_##name(struct file *file,		\
 					char __user *userbuf,		\
 					size_t count, loff_t *ppos)	\
 {									\
-	return ieee80211_if_read_link(file->private_data,		\
+	return ieee80211_if_read_link(file,				\
 				      userbuf, count, ppos,		\
 				      ieee80211_if_fmt_##name);	\
 }
@@ -221,7 +281,7 @@ static ssize_t ieee80211_if_write_##name(struct file *file,		\
 					 const char __user *userbuf,	\
 					 size_t count, loff_t *ppos)	\
 {									\
-	return ieee80211_if_write_link(file->private_data, userbuf,	\
+	return ieee80211_if_write_link(file, userbuf,			\
 				       count, ppos,			\
 				       ieee80211_if_parse_##name);	\
 }
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 06e3613bf46b..5bf507ebb096 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -312,23 +312,14 @@ static ssize_t sta_aql_write(struct file *file, const char __user *userbuf,
 STA_OPS_RW(aql);
 
 
-static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
-					size_t count, loff_t *ppos)
+static ssize_t sta_agg_status_do_read(struct wiphy *wiphy, struct file *file,
+				      char *buf, size_t bufsz, void *data)
 {
-	char *buf, *p;
-	ssize_t bufsz = 71 + IEEE80211_NUM_TIDS * 40;
+	struct sta_info *sta = data;
+	char *p = buf;
 	int i;
-	struct sta_info *sta = file->private_data;
 	struct tid_ampdu_rx *tid_rx;
 	struct tid_ampdu_tx *tid_tx;
-	ssize_t ret;
-
-	buf = kzalloc(bufsz, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	p = buf;
-
-	rcu_read_lock();
 
 	p += scnprintf(p, bufsz + buf - p, "next dialog_token: %#02x\n",
 			sta->ampdu_mlme.dialog_token_allocator + 1);
@@ -338,8 +329,8 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
 		bool tid_rx_valid;
 
-		tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]);
-		tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]);
+		tid_rx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_rx[i]);
+		tid_tx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_tx[i]);
 		tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid);
 
 		p += scnprintf(p, bufsz + buf - p, "%02d", i);
@@ -358,31 +349,39 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 				tid_tx ? skb_queue_len(&tid_tx->pending) : 0);
 		p += scnprintf(p, bufsz + buf - p, "\n");
 	}
-	rcu_read_unlock();
 
-	ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+	return p - buf;
+}
+
+static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
+				   size_t count, loff_t *ppos)
+{
+	struct sta_info *sta = file->private_data;
+	struct wiphy *wiphy = sta->local->hw.wiphy;
+	size_t bufsz = 71 + IEEE80211_NUM_TIDS * 40;
+	char *buf = kmalloc(bufsz, GFP_KERNEL);
+	ssize_t ret;
+
+	if (!buf)
+		return -ENOMEM;
+
+	ret = wiphy_locked_debugfs_read(wiphy, file, buf, bufsz,
+					userbuf, count, ppos,
+					sta_agg_status_do_read, sta);
 	kfree(buf);
+
 	return ret;
 }
 
-static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf,
-				    size_t count, loff_t *ppos)
+static ssize_t sta_agg_status_do_write(struct wiphy *wiphy, struct file *file,
+				       char *buf, size_t count, void *data)
 {
-	char _buf[25] = {}, *buf = _buf;
-	struct sta_info *sta = file->private_data;
+	struct sta_info *sta = data;
 	bool start, tx;
 	unsigned long tid;
-	char *pos;
+	char *pos = buf;
 	int ret, timeout = 5000;
 
-	if (count > sizeof(_buf))
-		return -EINVAL;
-
-	if (copy_from_user(buf, userbuf, count))
-		return -EFAULT;
-
-	buf[sizeof(_buf) - 1] = '\0';
-	pos = buf;
 	buf = strsep(&pos, " ");
 	if (!buf)
 		return -EINVAL;
@@ -420,7 +419,6 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu
 	if (ret || tid >= IEEE80211_NUM_TIDS)
 		return -EINVAL;
 
-	wiphy_lock(sta->local->hw.wiphy);
 	if (tx) {
 		if (start)
 			ret = ieee80211_start_tx_ba_session(&sta->sta, tid,
@@ -432,10 +430,22 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu
 					       3, true);
 		ret = 0;
 	}
-	wiphy_unlock(sta->local->hw.wiphy);
 
 	return ret ?: count;
 }
+
+static ssize_t sta_agg_status_write(struct file *file,
+				    const char __user *userbuf,
+				    size_t count, loff_t *ppos)
+{
+	struct sta_info *sta = file->private_data;
+	struct wiphy *wiphy = sta->local->hw.wiphy;
+	char _buf[26];
+
+	return wiphy_locked_debugfs_write(wiphy, file, _buf, sizeof(_buf),
+					  userbuf, count,
+					  sta_agg_status_do_write, sta);
+}
 STA_OPS_RW(agg_status);
 
 /* link sta attributes */
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 568633b38c47..f690c385a345 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -23,7 +23,7 @@
 static inline struct ieee80211_sub_if_data *
 get_bss_sdata(struct ieee80211_sub_if_data *sdata)
 {
-	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+	if (sdata && sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		sdata = container_of(sdata->bss, struct ieee80211_sub_if_data,
 				     u.ap);
 
@@ -695,11 +695,14 @@ static inline void drv_flush(struct ieee80211_local *local,
 			     struct ieee80211_sub_if_data *sdata,
 			     u32 queues, bool drop)
 {
-	struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL;
+	struct ieee80211_vif *vif;
 
 	might_sleep();
 	lockdep_assert_wiphy(local->hw.wiphy);
 
+	sdata = get_bss_sdata(sdata);
+	vif = sdata ? &sdata->vif : NULL;
+
 	if (sdata && !check_sdata_in_driver(sdata))
 		return;
 
@@ -716,6 +719,8 @@ static inline void drv_flush_sta(struct ieee80211_local *local,
 	might_sleep();
 	lockdep_assert_wiphy(local->hw.wiphy);
 
+	sdata = get_bss_sdata(sdata);
+
 	if (sdata && !check_sdata_in_driver(sdata))
 		return;
 
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 68cea2685224..749f4ecab990 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -271,6 +271,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
 	case NL80211_CHAN_WIDTH_80:
 	case NL80211_CHAN_WIDTH_80P80:
 	case NL80211_CHAN_WIDTH_160:
+	case NL80211_CHAN_WIDTH_320:
 		bw = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
 				IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
 		break;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index cd15ec73073e..c53914012d01 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -108,6 +108,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 			mp_opt->suboptions |= OPTION_MPTCP_DSS;
 			mp_opt->use_map = 1;
 			mp_opt->mpc_map = 1;
+			mp_opt->use_ack = 0;
 			mp_opt->data_len = get_unaligned_be16(ptr);
 			ptr += 2;
 		}
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index e502ec00b2fe..0e4beae421f8 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -31,7 +31,7 @@ struct bpf_nf_link {
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 static const struct nf_defrag_hook *
 get_proto_defrag_hook(struct bpf_nf_link *link,
-		      const struct nf_defrag_hook __rcu *global_hook,
+		      const struct nf_defrag_hook __rcu **ptr_global_hook,
 		      const char *mod)
 {
 	const struct nf_defrag_hook *hook;
@@ -39,7 +39,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link,
 
 	/* RCU protects us from races against module unloading */
 	rcu_read_lock();
-	hook = rcu_dereference(global_hook);
+	hook = rcu_dereference(*ptr_global_hook);
 	if (!hook) {
 		rcu_read_unlock();
 		err = request_module(mod);
@@ -47,7 +47,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link,
 			return ERR_PTR(err < 0 ? err : -EINVAL);
 
 		rcu_read_lock();
-		hook = rcu_dereference(global_hook);
+		hook = rcu_dereference(*ptr_global_hook);
 	}
 
 	if (hook && try_module_get(hook->owner)) {
@@ -78,7 +78,7 @@ static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
 	switch (link->hook_ops.pf) {
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
 	case NFPROTO_IPV4:
-		hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4");
+		hook = get_proto_defrag_hook(link, &nf_defrag_v4_hook, "nf_defrag_ipv4");
 		if (IS_ERR(hook))
 			return PTR_ERR(hook);
 
@@ -87,7 +87,7 @@ static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
 #endif
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 	case NFPROTO_IPV6:
-		hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6");
+		hook = get_proto_defrag_hook(link, &nf_defrag_v6_hook, "nf_defrag_ipv6");
 		if (IS_ERR(hook))
 			return PTR_ERR(hook);
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c0a42989b982..c5c17c6e80ed 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -803,7 +803,7 @@ static struct nft_table *nft_table_lookup(const struct net *net,
 
 static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
 						   const struct nlattr *nla,
-						   u8 genmask, u32 nlpid)
+						   int family, u8 genmask, u32 nlpid)
 {
 	struct nftables_pernet *nft_net;
 	struct nft_table *table;
@@ -811,6 +811,7 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
 	nft_net = nft_pernet(net);
 	list_for_each_entry(table, &nft_net->tables, list) {
 		if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
+		    table->family == family &&
 		    nft_active_genmask(table, genmask)) {
 			if (nft_table_has_owner(table) &&
 			    nlpid && table->nlpid != nlpid)
@@ -1544,7 +1545,7 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
 
 	if (nla[NFTA_TABLE_HANDLE]) {
 		attr = nla[NFTA_TABLE_HANDLE];
-		table = nft_table_lookup_byhandle(net, attr, genmask,
+		table = nft_table_lookup_byhandle(net, attr, family, genmask,
 						  NETLINK_CB(skb).portid);
 	} else {
 		attr = nla[NFTA_TABLE_NAME];
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index b18a79039125..c09dba57354c 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -280,10 +280,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 			priv->expr_array[i] = dynset_expr;
 			priv->num_exprs++;
 
-			if (set->num_exprs &&
-			    dynset_expr->ops != set->exprs[i]->ops) {
-				err = -EOPNOTSUPP;
-				goto err_expr_free;
+			if (set->num_exprs) {
+				if (i >= set->num_exprs) {
+					err = -EINVAL;
+					goto err_expr_free;
+				}
+				if (dynset_expr->ops != set->exprs[i]->ops) {
+					err = -EOPNOTSUPP;
+					goto err_expr_free;
+				}
 			}
 			i++;
 		}
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 3fbaa7bf41f9..6eb571d0c3fd 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -214,7 +214,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
 
 		offset = i + priv->offset;
 		if (priv->flags & NFT_EXTHDR_F_PRESENT) {
-			*dest = 1;
+			nft_reg_store8(dest, 1);
 		} else {
 			if (priv->len % NFT_REG32_SIZE)
 				dest[priv->len / NFT_REG32_SIZE] = 0;
@@ -461,7 +461,7 @@ static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
 		type = bufp[0];
 
 		if (type == priv->type) {
-			*dest = 1;
+			nft_reg_store8(dest, 1);
 			return;
 		}
 
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 1bfe258018da..37cfe6dd712d 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -145,11 +145,15 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv,
 	switch (priv->result) {
 	case NFT_FIB_RESULT_OIF:
 		index = dev ? dev->ifindex : 0;
-		*dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index;
+		if (priv->flags & NFTA_FIB_F_PRESENT)
+			nft_reg_store8(dreg, !!index);
+		else
+			*dreg = index;
+
 		break;
 	case NFT_FIB_RESULT_OIFNAME:
 		if (priv->flags & NFTA_FIB_F_PRESENT)
-			*dreg = !!dev;
+			nft_reg_store8(dreg, !!dev);
 		else
 			strscpy_pad(reg, dev ? dev->name : "", IFNAMSIZ);
 		break;
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 701977af3ee8..7252fcdae349 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -2043,6 +2043,9 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
 
 		e = f->mt[r].e;
 
+		if (!nft_set_elem_active(&e->ext, iter->genmask))
+			goto cont;
+
 		iter->err = iter->fn(ctx, set, iter, &e->priv);
 		if (iter->err < 0)
 			goto out;
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index e85ce69924ae..50332888c8d2 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -76,18 +76,23 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		 */
 		return false;
 
-	filp = sk->sk_socket->file;
-	if (filp == NULL)
+	read_lock_bh(&sk->sk_callback_lock);
+	filp = sk->sk_socket ? sk->sk_socket->file : NULL;
+	if (filp == NULL) {
+		read_unlock_bh(&sk->sk_callback_lock);
 		return ((info->match ^ info->invert) &
 		       (XT_OWNER_UID | XT_OWNER_GID)) == 0;
+	}
 
 	if (info->match & XT_OWNER_UID) {
 		kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
 		kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
 		if ((uid_gte(filp->f_cred->fsuid, uid_min) &&
 		     uid_lte(filp->f_cred->fsuid, uid_max)) ^
-		    !(info->invert & XT_OWNER_UID))
+		    !(info->invert & XT_OWNER_UID)) {
+			read_unlock_bh(&sk->sk_callback_lock);
 			return false;
+		}
 	}
 
 	if (info->match & XT_OWNER_GID) {
@@ -112,10 +117,13 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			}
 		}
 
-		if (match ^ !(info->invert & XT_OWNER_GID))
+		if (match ^ !(info->invert & XT_OWNER_GID)) {
+			read_unlock_bh(&sk->sk_callback_lock);
 			return false;
+		}
 	}
 
+	read_unlock_bh(&sk->sk_callback_lock);
 	return true;
 }
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 92ef5ed2e7b0..9c7ffd10df2a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1691,6 +1691,9 @@ static int genl_bind(struct net *net, int group)
 		if ((grp->flags & GENL_UNS_ADMIN_PERM) &&
 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
 			ret = -EPERM;
+		if (grp->cap_sys_admin &&
+		    !ns_capable(net->user_ns, CAP_SYS_ADMIN))
+			ret = -EPERM;
 
 		break;
 	}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a84e00b5904b..7adf48549a3b 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4300,7 +4300,7 @@ static void packet_mm_open(struct vm_area_struct *vma)
 	struct sock *sk = sock->sk;
 
 	if (sk)
-		atomic_inc(&pkt_sk(sk)->mapped);
+		atomic_long_inc(&pkt_sk(sk)->mapped);
 }
 
 static void packet_mm_close(struct vm_area_struct *vma)
@@ -4310,7 +4310,7 @@ static void packet_mm_close(struct vm_area_struct *vma)
 	struct sock *sk = sock->sk;
 
 	if (sk)
-		atomic_dec(&pkt_sk(sk)->mapped);
+		atomic_long_dec(&pkt_sk(sk)->mapped);
 }
 
 static const struct vm_operations_struct packet_mmap_ops = {
@@ -4405,7 +4405,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 
 	err = -EBUSY;
 	if (!closing) {
-		if (atomic_read(&po->mapped))
+		if (atomic_long_read(&po->mapped))
 			goto out;
 		if (packet_read_pending(rb))
 			goto out;
@@ -4508,7 +4508,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 
 	err = -EBUSY;
 	mutex_lock(&po->pg_vec_lock);
-	if (closing || atomic_read(&po->mapped) == 0) {
+	if (closing || atomic_long_read(&po->mapped) == 0) {
 		err = 0;
 		spin_lock_bh(&rb_queue->lock);
 		swap(rb->pg_vec, pg_vec);
@@ -4526,9 +4526,9 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		po->prot_hook.func = (po->rx_ring.pg_vec) ?
 						tpacket_rcv : packet_rcv;
 		skb_queue_purge(rb_queue);
-		if (atomic_read(&po->mapped))
-			pr_err("packet_mmap: vma is busy: %d\n",
-			       atomic_read(&po->mapped));
+		if (atomic_long_read(&po->mapped))
+			pr_err("packet_mmap: vma is busy: %ld\n",
+			       atomic_long_read(&po->mapped));
 	}
 	mutex_unlock(&po->pg_vec_lock);
 
@@ -4606,7 +4606,7 @@ static int packet_mmap(struct file *file, struct socket *sock,
 		}
 	}
 
-	atomic_inc(&po->mapped);
+	atomic_long_inc(&po->mapped);
 	vma->vm_ops = &packet_mmap_ops;
 	err = 0;
 
diff --git a/net/packet/internal.h b/net/packet/internal.h
index d29c94c45159..d5d70712007a 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -122,7 +122,7 @@ struct packet_sock {
 	__be16			num;
 	struct packet_rollover	*rollover;
 	struct packet_mclist	*mclist;
-	atomic_t		mapped;
+	atomic_long_t		mapped;
 	enum tpacket_versions	tp_version;
 	unsigned int		tp_hdrlen;
 	unsigned int		tp_reserve;
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 81a794e36f53..c34e902855db 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -31,7 +31,8 @@ enum psample_nl_multicast_groups {
 
 static const struct genl_multicast_group psample_nl_mcgrps[] = {
 	[PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME },
-	[PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME },
+	[PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME,
+				      .flags = GENL_UNS_ADMIN_PERM },
 };
 
 static struct genl_family psample_nl_family __ro_after_init;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 2a1388841951..73eebddbbf41 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -723,7 +723,7 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc,
 	int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size);
 
 	smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx;
-	smc->conn.peer_token = clc->d0.token;
+	smc->conn.peer_token = ntohll(clc->d0.token);
 	/* msg header takes up space in the buffer */
 	smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
 	atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
@@ -1415,7 +1415,7 @@ static int smc_connect_ism(struct smc_sock *smc,
 		if (rc)
 			return rc;
 	}
-	ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid;
+	ini->ism_peer_gid[ini->ism_selected] = ntohll(aclc->d0.gid);
 
 	/* there is only one lgr role for SMC-D; use server lock */
 	mutex_lock(&smc_server_lgr_pending);
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 8deb46c28f1d..72f4d81a3f41 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -1004,6 +1004,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
 {
 	struct smc_connection *conn = &smc->conn;
 	struct smc_clc_first_contact_ext_v2x fce;
+	struct smcd_dev *smcd = conn->lgr->smcd;
 	struct smc_clc_msg_accept_confirm *clc;
 	struct smc_clc_fce_gid_ext gle;
 	struct smc_clc_msg_trail trl;
@@ -1021,17 +1022,15 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
 		memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
 		       sizeof(SMCD_EYECATCHER));
 		clc->hdr.typev1 = SMC_TYPE_D;
-		clc->d0.gid =
-			conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd);
-		clc->d0.token = conn->rmb_desc->token;
+		clc->d0.gid = htonll(smcd->ops->get_local_gid(smcd));
+		clc->d0.token = htonll(conn->rmb_desc->token);
 		clc->d0.dmbe_size = conn->rmbe_size_comp;
 		clc->d0.dmbe_idx = 0;
 		memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
 		if (version == SMC_V1) {
 			clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
 		} else {
-			clc_v2->d1.chid =
-				htons(smc_ism_get_chid(conn->lgr->smcd));
+			clc_v2->d1.chid = htons(smc_ism_get_chid(smcd));
 			if (eid && eid[0])
 				memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN);
 			len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index c5c8e7db775a..08155a96a02a 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -204,8 +204,8 @@ struct smcr_clc_msg_accept_confirm {	/* SMCR accept/confirm */
 } __packed;
 
 struct smcd_clc_msg_accept_confirm_common {	/* SMCD accept/confirm */
-	u64 gid;		/* Sender GID */
-	u64 token;		/* DMB token */
+	__be64 gid;		/* Sender GID */
+	__be64 token;		/* DMB token */
 	u8 dmbe_idx;		/* DMBE index */
 #if defined(__BIG_ENDIAN_BITFIELD)
 	u8 dmbe_size : 4,	/* buf size (compressed) */
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 316f76187962..e37b4d2e2acd 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -952,6 +952,8 @@ static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg,
 		}
 
 		sk_msg_page_add(msg_pl, page, part, off);
+		msg_pl->sg.copybreak = 0;
+		msg_pl->sg.curr = msg_pl->sg.end;
 		sk_mem_charge(sk, part);
 		*copied += part;
 		try_to_copy -= part;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a357dc5f2404..ac1f2bc18fc9 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -213,8 +213,6 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 }
 #endif /* CONFIG_SECURITY_NETWORK */
 
-#define unix_peer(sk) (unix_sk(sk)->peer)
-
 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 {
 	return unix_peer(osk) == sk;
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index 2f9d8271c6ec..7ea7c3a0d0d0 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -159,12 +159,17 @@ int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool re
 
 int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
 {
+	struct sock *sk_pair;
+
 	if (restore) {
 		sk->sk_write_space = psock->saved_write_space;
 		sock_replace_proto(sk, psock->sk_proto);
 		return 0;
 	}
 
+	sk_pair = unix_peer(sk);
+	sock_hold(sk_pair);
+	psock->sk_pair = sk_pair;
 	unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
 	sock_replace_proto(sk, &unix_stream_bpf_prot);
 	return 0;
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index f6dc896bf44c..c8e162c9d1df 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -59,8 +59,7 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
 	t_ops = virtio_transport_get_ops(info->vsk);
 
 	if (t_ops->can_msgzerocopy) {
-		int pages_in_iov = iov_iter_npages(iov_iter, MAX_SKB_FRAGS);
-		int pages_to_send = min(pages_in_iov, MAX_SKB_FRAGS);
+		int pages_to_send = iov_iter_npages(iov_iter, MAX_SKB_FRAGS);
 
 		/* +1 is for packet header. */
 		return t_ops->can_msgzerocopy(pages_to_send + 1);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 758c9a2a12c0..409d74c57ca0 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -191,13 +191,13 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
 		return err;
 	}
 
+	wiphy_lock(&rdev->wiphy);
 	list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
 		if (!wdev->netdev)
 			continue;
 		nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
 	}
 
-	wiphy_lock(&rdev->wiphy);
 	nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY);
 
 	wiphy_net_set(&rdev->wiphy, net);
@@ -206,13 +206,13 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
 	WARN_ON(err);
 
 	nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
-	wiphy_unlock(&rdev->wiphy);
 
 	list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
 		if (!wdev->netdev)
 			continue;
 		nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
 	}
+	wiphy_unlock(&rdev->wiphy);
 
 	return 0;
 }
@@ -221,7 +221,9 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
 {
 	struct cfg80211_registered_device *rdev = data;
 
+	wiphy_lock(&rdev->wiphy);
 	rdev_rfkill_poll(rdev);
+	wiphy_unlock(&rdev->wiphy);
 }
 
 void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 4c692c7faf30..cb61d33d4f1e 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -293,6 +293,7 @@ struct cfg80211_cqm_config {
 	u32 rssi_hyst;
 	s32 last_rssi_event_value;
 	enum nl80211_cqm_rssi_threshold_event last_rssi_event_type;
+	bool use_range_api;
 	int n_rssi_thresholds;
 	s32 rssi_thresholds[] __counted_by(n_rssi_thresholds);
 };
diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
index 0878b162890a..40e49074e2ee 100644
--- a/net/wireless/debugfs.c
+++ b/net/wireless/debugfs.c
@@ -4,6 +4,7 @@
  *
  * Copyright 2009	Luis R. Rodriguez <lrodriguez@atheros.com>
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2023 Intel Corporation
  */
 
 #include <linux/slab.h>
@@ -109,3 +110,162 @@ void cfg80211_debugfs_rdev_add(struct cfg80211_registered_device *rdev)
 	DEBUGFS_ADD(long_retry_limit);
 	DEBUGFS_ADD(ht40allow_map);
 }
+
+struct debugfs_read_work {
+	struct wiphy_work work;
+	ssize_t (*handler)(struct wiphy *wiphy,
+			   struct file *file,
+			   char *buf,
+			   size_t count,
+			   void *data);
+	struct wiphy *wiphy;
+	struct file *file;
+	char *buf;
+	size_t bufsize;
+	void *data;
+	ssize_t ret;
+	struct completion completion;
+};
+
+static void wiphy_locked_debugfs_read_work(struct wiphy *wiphy,
+					   struct wiphy_work *work)
+{
+	struct debugfs_read_work *w = container_of(work, typeof(*w), work);
+
+	w->ret = w->handler(w->wiphy, w->file, w->buf, w->bufsize, w->data);
+	complete(&w->completion);
+}
+
+static void wiphy_locked_debugfs_read_cancel(struct dentry *dentry,
+					     void *data)
+{
+	struct debugfs_read_work *w = data;
+
+	wiphy_work_cancel(w->wiphy, &w->work);
+	complete(&w->completion);
+}
+
+ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file,
+				  char *buf, size_t bufsize,
+				  char __user *userbuf, size_t count,
+				  loff_t *ppos,
+				  ssize_t (*handler)(struct wiphy *wiphy,
+						     struct file *file,
+						     char *buf,
+						     size_t bufsize,
+						     void *data),
+				  void *data)
+{
+	struct debugfs_read_work work = {
+		.handler = handler,
+		.wiphy = wiphy,
+		.file = file,
+		.buf = buf,
+		.bufsize = bufsize,
+		.data = data,
+		.ret = -ENODEV,
+		.completion = COMPLETION_INITIALIZER_ONSTACK(work.completion),
+	};
+	struct debugfs_cancellation cancellation = {
+		.cancel = wiphy_locked_debugfs_read_cancel,
+		.cancel_data = &work,
+	};
+
+	/* don't leak stack data or whatever */
+	memset(buf, 0, bufsize);
+
+	wiphy_work_init(&work.work, wiphy_locked_debugfs_read_work);
+	wiphy_work_queue(wiphy, &work.work);
+
+	debugfs_enter_cancellation(file, &cancellation);
+	wait_for_completion(&work.completion);
+	debugfs_leave_cancellation(file, &cancellation);
+
+	if (work.ret < 0)
+		return work.ret;
+
+	if (WARN_ON(work.ret > bufsize))
+		return -EINVAL;
+
+	return simple_read_from_buffer(userbuf, count, ppos, buf, work.ret);
+}
+EXPORT_SYMBOL_GPL(wiphy_locked_debugfs_read);
+
+struct debugfs_write_work {
+	struct wiphy_work work;
+	ssize_t (*handler)(struct wiphy *wiphy,
+			   struct file *file,
+			   char *buf,
+			   size_t count,
+			   void *data);
+	struct wiphy *wiphy;
+	struct file *file;
+	char *buf;
+	size_t count;
+	void *data;
+	ssize_t ret;
+	struct completion completion;
+};
+
+static void wiphy_locked_debugfs_write_work(struct wiphy *wiphy,
+					    struct wiphy_work *work)
+{
+	struct debugfs_write_work *w = container_of(work, typeof(*w), work);
+
+	w->ret = w->handler(w->wiphy, w->file, w->buf, w->count, w->data);
+	complete(&w->completion);
+}
+
+static void wiphy_locked_debugfs_write_cancel(struct dentry *dentry,
+					      void *data)
+{
+	struct debugfs_write_work *w = data;
+
+	wiphy_work_cancel(w->wiphy, &w->work);
+	complete(&w->completion);
+}
+
+ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy,
+				   struct file *file, char *buf, size_t bufsize,
+				   const char __user *userbuf, size_t count,
+				   ssize_t (*handler)(struct wiphy *wiphy,
+						      struct file *file,
+						      char *buf,
+						      size_t count,
+						      void *data),
+				   void *data)
+{
+	struct debugfs_write_work work = {
+		.handler = handler,
+		.wiphy = wiphy,
+		.file = file,
+		.buf = buf,
+		.count = count,
+		.data = data,
+		.ret = -ENODEV,
+		.completion = COMPLETION_INITIALIZER_ONSTACK(work.completion),
+	};
+	struct debugfs_cancellation cancellation = {
+		.cancel = wiphy_locked_debugfs_write_cancel,
+		.cancel_data = &work,
+	};
+
+	/* mostly used for strings so enforce NUL-termination for safety */
+	if (count >= bufsize)
+		return -EINVAL;
+
+	memset(buf, 0, bufsize);
+
+	if (copy_from_user(buf, userbuf, count))
+		return -EFAULT;
+
+	wiphy_work_init(&work.work, wiphy_locked_debugfs_write_work);
+	wiphy_work_queue(wiphy, &work.work);
+
+	debugfs_enter_cancellation(file, &cancellation);
+	wait_for_completion(&work.completion);
+	debugfs_leave_cancellation(file, &cancellation);
+
+	return work.ret;
+}
+EXPORT_SYMBOL_GPL(wiphy_locked_debugfs_write);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 569234bc2be6..1cbbb11ea503 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3822,6 +3822,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
 	struct net_device *dev = wdev->netdev;
 	void *hdr;
 
+	lockdep_assert_wiphy(&rdev->wiphy);
+
 	WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE &&
 		cmd != NL80211_CMD_DEL_INTERFACE &&
 		cmd != NL80211_CMD_SET_INTERFACE);
@@ -3989,6 +3991,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
 
 		if_idx = 0;
 
+		wiphy_lock(&rdev->wiphy);
 		list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
 			if (if_idx < if_start) {
 				if_idx++;
@@ -3998,10 +4001,12 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
 					       cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					       rdev, wdev,
 					       NL80211_CMD_NEW_INTERFACE) < 0) {
+				wiphy_unlock(&rdev->wiphy);
 				goto out;
 			}
 			if_idx++;
 		}
+		wiphy_unlock(&rdev->wiphy);
 
 		wp_idx++;
 	}
@@ -12787,10 +12792,6 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
 	int i, n, low_index;
 	int err;
 
-	/* RSSI reporting disabled? */
-	if (!cqm_config)
-		return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0);
-
 	/*
 	 * Obtain current RSSI value if possible, if not and no RSSI threshold
 	 * event has been received yet, we should receive an event after a
@@ -12865,23 +12866,25 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
 	    wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
 		return -EOPNOTSUPP;
 
-	if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) {
-		if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */
-			return rdev_set_cqm_rssi_config(rdev, dev, 0, 0);
-
-		return rdev_set_cqm_rssi_config(rdev, dev,
-						thresholds[0], hysteresis);
-	}
-
-	if (!wiphy_ext_feature_isset(&rdev->wiphy,
-				     NL80211_EXT_FEATURE_CQM_RSSI_LIST))
-		return -EOPNOTSUPP;
-
 	if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */
 		n_thresholds = 0;
 
 	old = wiphy_dereference(wdev->wiphy, wdev->cqm_config);
 
+	/* if already disabled just succeed */
+	if (!n_thresholds && !old)
+		return 0;
+
+	if (n_thresholds > 1) {
+		if (!wiphy_ext_feature_isset(&rdev->wiphy,
+					     NL80211_EXT_FEATURE_CQM_RSSI_LIST) ||
+		    !rdev->ops->set_cqm_rssi_range_config)
+			return -EOPNOTSUPP;
+	} else {
+		if (!rdev->ops->set_cqm_rssi_config)
+			return -EOPNOTSUPP;
+	}
+
 	if (n_thresholds) {
 		cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds,
 						 n_thresholds),
@@ -12894,13 +12897,26 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
 		memcpy(cqm_config->rssi_thresholds, thresholds,
 		       flex_array_size(cqm_config, rssi_thresholds,
 				       n_thresholds));
+		cqm_config->use_range_api = n_thresholds > 1 ||
+					    !rdev->ops->set_cqm_rssi_config;
 
 		rcu_assign_pointer(wdev->cqm_config, cqm_config);
+
+		if (cqm_config->use_range_api)
+			err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config);
+		else
+			err = rdev_set_cqm_rssi_config(rdev, dev,
+						       thresholds[0],
+						       hysteresis);
 	} else {
 		RCU_INIT_POINTER(wdev->cqm_config, NULL);
+		/* if enabled as range also disable via range */
+		if (old->use_range_api)
+			err = rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0);
+		else
+			err = rdev_set_cqm_rssi_config(rdev, dev, 0, 0);
 	}
 
-	err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config);
 	if (err) {
 		rcu_assign_pointer(wdev->cqm_config, old);
 		kfree_rcu(cqm_config, rcu_head);
@@ -19009,10 +19025,11 @@ void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work)
 	s32 rssi_level;
 
 	cqm_config = wiphy_dereference(wdev->wiphy, wdev->cqm_config);
-	if (!wdev->cqm_config)
+	if (!cqm_config)
 		return;
 
-	cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config);
+	if (cqm_config->use_range_api)
+		cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config);
 
 	rssi_level = cqm_config->last_rssi_event_value;
 	rssi_event = cqm_config->last_rssi_event_type;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ae9f8cb611f6..3da0b52f308d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -947,7 +947,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
 
 	rcu_read_lock();
 	if (xsk_check_common(xs))
-		goto skip_tx;
+		goto out;
 
 	pool = xs->pool;
 
@@ -959,12 +959,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
 			xsk_generic_xmit(sk);
 	}
 
-skip_tx:
 	if (xs->rx && !xskq_prod_is_empty(xs->rx))
 		mask |= EPOLLIN | EPOLLRDNORM;
 	if (xs->tx && xsk_tx_writeable(xs))
 		mask |= EPOLLOUT | EPOLLWRNORM;
-
+out:
 	rcu_read_unlock();
 	return mask;
 }
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index d83ba5d8f3f4..f27d552aec43 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -138,15 +138,11 @@ $total_size = 0;
 while (my $line = <STDIN>) {
 	if ($line =~ m/$funcre/) {
 		$func = $1;
-		next if $line !~ m/^($xs*)/;
+		next if $line !~ m/^($x*)/;
 		if ($total_size > $min_stack) {
 			push @stack, "$intro$total_size\n";
 		}
-
-		$addr = $1;
-		$addr =~ s/ /0/g;
-		$addr = "0x$addr";
-
+		$addr = "0x$1";
 		$intro = "$addr $func [$file]:";
 		my $padlen = 56 - length($intro);
 		while ($padlen > 0) {
diff --git a/scripts/dtc/dt-extract-compatibles b/scripts/dtc/dt-extract-compatibles
index bd07477dd144..5ffb2364409b 100755
--- a/scripts/dtc/dt-extract-compatibles
+++ b/scripts/dtc/dt-extract-compatibles
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0-only
 
+import fnmatch
 import os
-import glob
 import re
 import argparse
 
@@ -81,10 +81,20 @@ def print_compat(filename, compatibles):
 	else:
 		print(*compatibles, sep='\n')
 
+def glob_without_symlinks(root, glob):
+	for path, dirs, files in os.walk(root):
+		# Ignore hidden directories
+		for d in dirs:
+			if fnmatch.fnmatch(d, ".*"):
+				dirs.remove(d)
+		for f in files:
+			if fnmatch.fnmatch(f, glob):
+				yield os.path.join(path, f)
+
 def files_to_parse(path_args):
 	for f in path_args:
 		if os.path.isdir(f):
-			for filename in glob.iglob(f + "/**/*.c", recursive=True):
+			for filename in glob_without_symlinks(f, "*.c"):
 				yield filename
 		else:
 			yield f
diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c
index 910bd21d08f4..746ff2d272f2 100644
--- a/scripts/gcc-plugins/randomize_layout_plugin.c
+++ b/scripts/gcc-plugins/randomize_layout_plugin.c
@@ -339,8 +339,7 @@ static int relayout_struct(tree type)
 
 	/*
 	 * enforce that we don't randomize the layout of the last
-	 * element of a struct if it's a 0 or 1-length array
-	 * or a proper flexible array
+	 * element of a struct if it's a proper flexible array
 	 */
 	if (is_flexible_array(newtree[num_fields - 1])) {
 		has_flexarray = true;
diff --git a/scripts/gdb/linux/device.py b/scripts/gdb/linux/device.py
index 16376c5cfec6..0eabc5f4f8ca 100644
--- a/scripts/gdb/linux/device.py
+++ b/scripts/gdb/linux/device.py
@@ -36,26 +36,26 @@ def for_each_bus():
     for kobj in kset_for_each_object(gdb.parse_and_eval('bus_kset')):
         subsys = container_of(kobj, kset_type.get_type().pointer(), 'kobj')
         subsys_priv = container_of(subsys, subsys_private_type.get_type().pointer(), 'subsys')
-        yield subsys_priv['bus']
+        yield subsys_priv
 
 
 def for_each_class():
     for kobj in kset_for_each_object(gdb.parse_and_eval('class_kset')):
         subsys = container_of(kobj, kset_type.get_type().pointer(), 'kobj')
         subsys_priv = container_of(subsys, subsys_private_type.get_type().pointer(), 'subsys')
-        yield subsys_priv['class']
+        yield subsys_priv
 
 
 def get_bus_by_name(name):
     for item in for_each_bus():
-        if item['name'].string() == name:
+        if item['bus']['name'].string() == name:
             return item
     raise gdb.GdbError("Can't find bus type {!r}".format(name))
 
 
 def get_class_by_name(name):
     for item in for_each_class():
-        if item['name'].string() == name:
+        if item['class']['name'].string() == name:
             return item
     raise gdb.GdbError("Can't find device class {!r}".format(name))
 
@@ -70,13 +70,13 @@ def klist_for_each(klist):
 
 
 def bus_for_each_device(bus):
-    for kn in klist_for_each(bus['p']['klist_devices']):
+    for kn in klist_for_each(bus['klist_devices']):
         dp = container_of(kn, device_private_type.get_type().pointer(), 'knode_bus')
         yield dp['device']
 
 
 def class_for_each_device(cls):
-    for kn in klist_for_each(cls['p']['klist_devices']):
+    for kn in klist_for_each(cls['klist_devices']):
         dp = container_of(kn, device_private_type.get_type().pointer(), 'knode_class')
         yield dp['device']
 
@@ -103,7 +103,7 @@ class LxDeviceListBus(gdb.Command):
     def invoke(self, arg, from_tty):
         if not arg:
             for bus in for_each_bus():
-                gdb.write('bus {}:\t{}\n'.format(bus['name'].string(), bus))
+                gdb.write('bus {}:\t{}\n'.format(bus['bus']['name'].string(), bus))
                 for dev in bus_for_each_device(bus):
                     _show_device(dev, level=1)
         else:
@@ -123,7 +123,7 @@ class LxDeviceListClass(gdb.Command):
     def invoke(self, arg, from_tty):
         if not arg:
             for cls in for_each_class():
-                gdb.write("class {}:\t{}\n".format(cls['name'].string(), cls))
+                gdb.write("class {}:\t{}\n".format(cls['class']['name'].string(), cls))
                 for dev in class_for_each_device(cls):
                     _show_device(dev, level=1)
         else:
diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py
index 17ec19e9b5bf..aa5ab6251f76 100644
--- a/scripts/gdb/linux/tasks.py
+++ b/scripts/gdb/linux/tasks.py
@@ -13,7 +13,7 @@
 
 import gdb
 
-from linux import utils
+from linux import utils, lists
 
 
 task_type = utils.CachedType("struct task_struct")
@@ -22,19 +22,15 @@ task_type = utils.CachedType("struct task_struct")
 def task_lists():
     task_ptr_type = task_type.get_type().pointer()
     init_task = gdb.parse_and_eval("init_task").address
-    t = g = init_task
+    t = init_task
 
     while True:
-        while True:
-            yield t
+        thread_head = t['signal']['thread_head']
+        for thread in lists.list_for_each_entry(thread_head, task_ptr_type, 'thread_node'):
+            yield thread
 
-            t = utils.container_of(t['thread_group']['next'],
-                                   task_ptr_type, "thread_group")
-            if t == g:
-                break
-
-        t = g = utils.container_of(g['tasks']['next'],
-                                   task_ptr_type, "tasks")
+        t = utils.container_of(t['tasks']['next'],
+                               task_ptr_type, "tasks")
         if t == init_task:
             return
 
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index 20bb2d7c8d4b..6d0c9c37796c 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -253,6 +253,7 @@ static const char * const snd_pcm_state_names[] = {
 	STATE(DRAINING),
 	STATE(PAUSED),
 	STATE(SUSPENDED),
+	STATE(DISCONNECTED),
 };
 
 static const char * const snd_pcm_access_names[] = {
diff --git a/sound/drivers/pcmtest.c b/sound/drivers/pcmtest.c
index b59b78a09224..b8bff5522bce 100644
--- a/sound/drivers/pcmtest.c
+++ b/sound/drivers/pcmtest.c
@@ -397,7 +397,6 @@ static int snd_pcmtst_pcm_close(struct snd_pcm_substream *substream)
 	struct pcmtst_buf_iter *v_iter = substream->runtime->private_data;
 
 	timer_shutdown_sync(&v_iter->timer_instance);
-	v_iter->substream = NULL;
 	playback_capture_test = !v_iter->is_buf_corrupted;
 	kfree(v_iter);
 	return 0;
@@ -435,6 +434,7 @@ static int snd_pcmtst_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
 		// We can't call timer_shutdown_sync here, as it is forbidden to sleep here
 		v_iter->suspend = true;
+		timer_delete(&v_iter->timer_instance);
 		break;
 	}
 
@@ -512,12 +512,22 @@ static int snd_pcmtst_ioctl(struct snd_pcm_substream *substream, unsigned int cm
 	return snd_pcm_lib_ioctl(substream, cmd, arg);
 }
 
+static int snd_pcmtst_sync_stop(struct snd_pcm_substream *substream)
+{
+	struct pcmtst_buf_iter *v_iter = substream->runtime->private_data;
+
+	timer_delete_sync(&v_iter->timer_instance);
+
+	return 0;
+}
+
 static const struct snd_pcm_ops snd_pcmtst_playback_ops = {
 	.open =		snd_pcmtst_pcm_open,
 	.close =	snd_pcmtst_pcm_close,
 	.trigger =	snd_pcmtst_pcm_trigger,
 	.hw_params =	snd_pcmtst_pcm_hw_params,
 	.ioctl =	snd_pcmtst_ioctl,
+	.sync_stop =	snd_pcmtst_sync_stop,
 	.hw_free =	snd_pcmtst_pcm_hw_free,
 	.prepare =	snd_pcmtst_pcm_prepare,
 	.pointer =	snd_pcmtst_pcm_pointer,
@@ -530,6 +540,7 @@ static const struct snd_pcm_ops snd_pcmtst_capture_ops = {
 	.hw_params =	snd_pcmtst_pcm_hw_params,
 	.hw_free =	snd_pcmtst_pcm_hw_free,
 	.ioctl =	snd_pcmtst_ioctl,
+	.sync_stop =	snd_pcmtst_sync_stop,
 	.prepare =	snd_pcmtst_pcm_prepare,
 	.pointer =	snd_pcmtst_pcm_pointer,
 };
diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c
index 2c4dfc0b7e34..696a958d93e9 100644
--- a/sound/hda/intel-nhlt.c
+++ b/sound/hda/intel-nhlt.c
@@ -238,7 +238,7 @@ EXPORT_SYMBOL(intel_nhlt_ssp_mclk_mask);
 
 static struct nhlt_specific_cfg *
 nhlt_get_specific_cfg(struct device *dev, struct nhlt_fmt *fmt, u8 num_ch,
-		      u32 rate, u8 vbps, u8 bps)
+		      u32 rate, u8 vbps, u8 bps, bool ignore_vbps)
 {
 	struct nhlt_fmt_cfg *cfg = fmt->fmt_config;
 	struct wav_fmt *wfmt;
@@ -255,8 +255,12 @@ nhlt_get_specific_cfg(struct device *dev, struct nhlt_fmt *fmt, u8 num_ch,
 		dev_dbg(dev, "Endpoint format: ch=%d fmt=%d/%d rate=%d\n",
 			wfmt->channels, _vbps, _bps, wfmt->samples_per_sec);
 
+		/*
+		 * When looking for exact match of configuration ignore the vbps
+		 * from NHLT table when ignore_vbps is true
+		 */
 		if (wfmt->channels == num_ch && wfmt->samples_per_sec == rate &&
-		    vbps == _vbps && bps == _bps)
+		    (ignore_vbps || vbps == _vbps) && bps == _bps)
 			return &cfg->config;
 
 		cfg = (struct nhlt_fmt_cfg *)(cfg->config.caps + cfg->config.size);
@@ -289,6 +293,7 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt,
 {
 	struct nhlt_specific_cfg *cfg;
 	struct nhlt_endpoint *epnt;
+	bool ignore_vbps = false;
 	struct nhlt_fmt *fmt;
 	int i;
 
@@ -298,7 +303,26 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt,
 	dev_dbg(dev, "Looking for configuration:\n");
 	dev_dbg(dev, "  vbus_id=%d link_type=%d dir=%d, dev_type=%d\n",
 		bus_id, link_type, dir, dev_type);
-	dev_dbg(dev, "  ch=%d fmt=%d/%d rate=%d\n", num_ch, vbps, bps, rate);
+	if (link_type == NHLT_LINK_DMIC && bps == 32 && (vbps == 24 || vbps == 32)) {
+		/*
+		 * The DMIC hardware supports only one type of 32 bits sample
+		 * size, which is 24 bit sampling on the MSB side and bits[1:0]
+		 * are used for indicating the channel number.
+		 * It has been observed that some NHLT tables have the vbps
+		 * specified as 32 while some uses 24.
+		 * The format these variations describe are identical, the
+		 * hardware is configured and behaves the same way.
+		 * Note: when the samples assumed to be vbps=32 then the 'noise'
+		 * introduced by the lower two bits (channel number) have no
+		 * real life implication on audio quality.
+		 */
+		dev_dbg(dev,
+			"  ch=%d fmt=%d rate=%d (vbps is ignored for DMIC 32bit format)\n",
+			num_ch, bps, rate);
+		ignore_vbps = true;
+	} else {
+		dev_dbg(dev, "  ch=%d fmt=%d/%d rate=%d\n", num_ch, vbps, bps, rate);
+	}
 	dev_dbg(dev, "Endpoint count=%d\n", nhlt->endpoint_count);
 
 	epnt = (struct nhlt_endpoint *)nhlt->desc;
@@ -307,7 +331,8 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt,
 		if (nhlt_check_ep_match(dev, epnt, bus_id, link_type, dir, dev_type)) {
 			fmt = (struct nhlt_fmt *)(epnt->config.caps + epnt->config.size);
 
-			cfg = nhlt_get_specific_cfg(dev, fmt, num_ch, rate, vbps, bps);
+			cfg = nhlt_get_specific_cfg(dev, fmt, num_ch, rate,
+						    vbps, bps, ignore_vbps);
 			if (cfg)
 				return cfg;
 		}
diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c
index b2db8091f0ed..cbd7d8badf91 100644
--- a/sound/pci/hda/cs35l41_hda.c
+++ b/sound/pci/hda/cs35l41_hda.c
@@ -504,7 +504,6 @@ static void cs35l41_shutdown_dsp(struct cs35l41_hda *cs35l41)
 
 	cs_dsp_stop(dsp);
 	cs_dsp_power_down(dsp);
-	cs35l41->firmware_running = false;
 	dev_dbg(cs35l41->dev, "Unloaded Firmware\n");
 }
 
@@ -550,7 +549,7 @@ static void cs35l41_hda_play_start(struct device *dev)
 
 	cs35l41->playback_started = true;
 
-	if (cs35l41->firmware_running) {
+	if (cs35l41->cs_dsp.running) {
 		regmap_multi_reg_write(reg, cs35l41_hda_config_dsp,
 				       ARRAY_SIZE(cs35l41_hda_config_dsp));
 		regmap_update_bits(reg, CS35L41_PWR_CTRL2,
@@ -580,7 +579,7 @@ static void cs35l41_mute(struct device *dev, bool mute)
 			regmap_multi_reg_write(reg, cs35l41_hda_mute, ARRAY_SIZE(cs35l41_hda_mute));
 		} else {
 			dev_dbg(dev, "Unmuting\n");
-			if (cs35l41->firmware_running) {
+			if (cs35l41->cs_dsp.running) {
 				regmap_multi_reg_write(reg, cs35l41_hda_unmute_dsp,
 						ARRAY_SIZE(cs35l41_hda_unmute_dsp));
 			} else {
@@ -599,7 +598,7 @@ static void cs35l41_hda_play_done(struct device *dev)
 	dev_dbg(dev, "Play (Complete)\n");
 
 	cs35l41_global_enable(dev, reg, cs35l41->hw_cfg.bst_type, 1,
-			      cs35l41->firmware_running);
+			      &cs35l41->cs_dsp);
 	cs35l41_mute(dev, false);
 }
 
@@ -612,7 +611,7 @@ static void cs35l41_hda_pause_start(struct device *dev)
 
 	cs35l41_mute(dev, true);
 	cs35l41_global_enable(dev, reg, cs35l41->hw_cfg.bst_type, 0,
-			      cs35l41->firmware_running);
+			      &cs35l41->cs_dsp);
 }
 
 static void cs35l41_hda_pause_done(struct device *dev)
@@ -625,7 +624,7 @@ static void cs35l41_hda_pause_done(struct device *dev)
 	regmap_update_bits(reg, CS35L41_PWR_CTRL2, CS35L41_AMP_EN_MASK, 0 << CS35L41_AMP_EN_SHIFT);
 	if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST)
 		regmap_write(reg, CS35L41_GPIO1_CTRL1, 0x00000001);
-	if (cs35l41->firmware_running) {
+	if (cs35l41->cs_dsp.running) {
 		cs35l41_set_cspl_mbox_cmd(dev, reg, CSPL_MBOX_CMD_PAUSE);
 		regmap_update_bits(reg, CS35L41_PWR_CTRL2,
 				   CS35L41_VMON_EN_MASK | CS35L41_IMON_EN_MASK,
@@ -675,7 +674,7 @@ static void cs35l41_hda_playback_hook(struct device *dev, int action)
 		break;
 	case HDA_GEN_PCM_ACT_CLOSE:
 		mutex_lock(&cs35l41->fw_mutex);
-		if (!cs35l41->firmware_running && cs35l41->request_fw_load &&
+		if (!cs35l41->cs_dsp.running && cs35l41->request_fw_load &&
 		    !cs35l41->fw_request_ongoing) {
 			dev_info(dev, "Requesting Firmware Load after HDA_GEN_PCM_ACT_CLOSE\n");
 			cs35l41->fw_request_ongoing = true;
@@ -761,10 +760,9 @@ static int cs35l41_verify_id(struct cs35l41_hda *cs35l41, unsigned int *regid, u
 static int cs35l41_ready_for_reset(struct cs35l41_hda *cs35l41)
 {
 	mutex_lock(&cs35l41->fw_mutex);
-	if (cs35l41->firmware_running) {
+	if (cs35l41->cs_dsp.running) {
 		cs35l41->cs_dsp.running = false;
 		cs35l41->cs_dsp.booted = false;
-		cs35l41->firmware_running = false;
 	}
 	regcache_mark_dirty(cs35l41->regmap);
 	mutex_unlock(&cs35l41->fw_mutex);
@@ -925,7 +923,7 @@ static int cs35l41_runtime_suspend(struct device *dev)
 
 	mutex_lock(&cs35l41->fw_mutex);
 
-	if (cs35l41->firmware_running) {
+	if (cs35l41->cs_dsp.running) {
 		ret = cs35l41_enter_hibernate(cs35l41->dev, cs35l41->regmap,
 					      cs35l41->hw_cfg.bst_type);
 		if (ret)
@@ -960,7 +958,7 @@ static int cs35l41_runtime_resume(struct device *dev)
 
 	regcache_cache_only(cs35l41->regmap, false);
 
-	if (cs35l41->firmware_running)	{
+	if (cs35l41->cs_dsp.running)	{
 		ret = cs35l41_exit_hibernate(cs35l41->dev, cs35l41->regmap);
 		if (ret) {
 			dev_warn(cs35l41->dev, "Unable to exit Hibernate.");
@@ -1052,8 +1050,6 @@ static int cs35l41_smart_amp(struct cs35l41_hda *cs35l41)
 		goto clean_dsp;
 	}
 
-	cs35l41->firmware_running = true;
-
 	return 0;
 
 clean_dsp:
@@ -1063,10 +1059,10 @@ clean_dsp:
 
 static void cs35l41_load_firmware(struct cs35l41_hda *cs35l41, bool load)
 {
-	if (cs35l41->firmware_running && !load) {
+	if (cs35l41->cs_dsp.running && !load) {
 		dev_dbg(cs35l41->dev, "Unloading Firmware\n");
 		cs35l41_shutdown_dsp(cs35l41);
-	} else if (!cs35l41->firmware_running && load) {
+	} else if (!cs35l41->cs_dsp.running && load) {
 		dev_dbg(cs35l41->dev, "Loading Firmware\n");
 		cs35l41_smart_amp(cs35l41);
 	} else {
@@ -1346,7 +1342,7 @@ static int cs35l41_hda_bind(struct device *dev, struct device *master, void *mas
 		 cs35l41->acpi_subsystem_id, cs35l41->hw_cfg.bst_type,
 		 cs35l41->hw_cfg.gpio1.func == CS35l41_VSPK_SWITCH,
 		 cs35l41->hw_cfg.spk_pos ? 'R' : 'L',
-		 cs35l41->firmware_running, cs35l41->speaker_id);
+		 cs35l41->cs_dsp.running, cs35l41->speaker_id);
 
 	return ret;
 }
diff --git a/sound/pci/hda/cs35l56_hda_spi.c b/sound/pci/hda/cs35l56_hda_spi.c
index 756aec342eab..27d7fbc56b4c 100644
--- a/sound/pci/hda/cs35l56_hda_spi.c
+++ b/sound/pci/hda/cs35l56_hda_spi.c
@@ -21,6 +21,10 @@ static int cs35l56_hda_spi_probe(struct spi_device *spi)
 		return -ENOMEM;
 
 	cs35l56->base.dev = &spi->dev;
+
+#ifdef CS35L56_WAKE_HOLD_TIME_US
+	cs35l56->base.can_hibernate = true;
+#endif
 	cs35l56->base.regmap = devm_regmap_init_spi(spi, &cs35l56_regmap_spi);
 	if (IS_ERR(cs35l56->base.regmap)) {
 		ret = PTR_ERR(cs35l56->base.regmap);
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index db90feb49c16..2d1df3654424 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2242,6 +2242,8 @@ static const struct snd_pci_quirk power_save_denylist[] = {
 	SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0),
 	/* https://bugs.launchpad.net/bugs/1821663 */
 	SND_PCI_QUIRK(0x1631, 0xe017, "Packard Bell NEC IMEDIA 5204", 0),
+	/* KONTRON SinglePC may cause a stall at runtime resume */
+	SND_PCI_QUIRK(0x1734, 0x1232, "KONTRON SinglePC", 0),
 	{}
 };
 #endif /* CONFIG_PM */
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 5618b1d9bfd1..0377912e9264 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1987,6 +1987,7 @@ enum {
 	ALC887_FIXUP_ASUS_AUDIO,
 	ALC887_FIXUP_ASUS_HMIC,
 	ALCS1200A_FIXUP_MIC_VREF,
+	ALC888VD_FIXUP_MIC_100VREF,
 };
 
 static void alc889_fixup_coef(struct hda_codec *codec,
@@ -2540,6 +2541,13 @@ static const struct hda_fixup alc882_fixups[] = {
 			{}
 		}
 	},
+	[ALC888VD_FIXUP_MIC_100VREF] = {
+		.type = HDA_FIXUP_PINCTLS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x18, PIN_VREF100 }, /* headset mic */
+			{}
+		}
+	},
 };
 
 static const struct snd_pci_quirk alc882_fixup_tbl[] = {
@@ -2609,6 +2617,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_MBA11_VREF),
 
 	SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD),
+	SND_PCI_QUIRK(0x10ec, 0x12d8, "iBase Elo Touch", ALC888VD_FIXUP_MIC_100VREF),
 	SND_PCI_QUIRK(0x13fe, 0x1009, "Advantech MIT-W101", ALC886_FIXUP_EAPD),
 	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
@@ -3256,6 +3265,7 @@ static void alc_disable_headset_jack_key(struct hda_codec *codec)
 	case 0x10ec0230:
 	case 0x10ec0236:
 	case 0x10ec0256:
+	case 0x10ec0257:
 	case 0x19e58326:
 		alc_write_coef_idx(codec, 0x48, 0x0);
 		alc_update_coef_idx(codec, 0x49, 0x0045, 0x0);
@@ -3285,6 +3295,7 @@ static void alc_enable_headset_jack_key(struct hda_codec *codec)
 	case 0x10ec0230:
 	case 0x10ec0236:
 	case 0x10ec0256:
+	case 0x10ec0257:
 	case 0x19e58326:
 		alc_write_coef_idx(codec, 0x48, 0xd011);
 		alc_update_coef_idx(codec, 0x49, 0x007f, 0x0045);
@@ -6496,6 +6507,7 @@ static void alc_combo_jack_hp_jd_restart(struct hda_codec *codec)
 	case 0x10ec0236:
 	case 0x10ec0255:
 	case 0x10ec0256:
+	case 0x10ec0257:
 	case 0x19e58326:
 		alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */
 		alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15);
@@ -7249,6 +7261,7 @@ enum {
 	ALC290_FIXUP_SUBWOOFER_HSJACK,
 	ALC269_FIXUP_THINKPAD_ACPI,
 	ALC269_FIXUP_DMIC_THINKPAD_ACPI,
+	ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO,
 	ALC255_FIXUP_ACER_MIC_NO_PRESENCE,
 	ALC255_FIXUP_ASUS_MIC_NO_PRESENCE,
 	ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
@@ -7601,6 +7614,14 @@ static const struct hda_fixup alc269_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc269_fixup_pincfg_U7x7_headset_mic,
 	},
+	[ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x18, 0x03a19020 }, /* headset mic */
+			{ 0x1b, 0x90170150 }, /* speaker */
+			{ }
+		},
+	},
 	[ALC269_FIXUP_AMIC] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -9684,6 +9705,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK),
 	SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS),
 	SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS),
+	SND_PCI_QUIRK(0x1028, 0x0beb, "Dell XPS 15 9530 (2023)", ALC289_FIXUP_DELL_CS35L41_SPI_2),
 	SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS),
 	SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS),
@@ -9942,6 +9964,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x17f3, "ROG Ally RC71L_RC71L", ALC294_FIXUP_ASUS_ALLY),
 	SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS),
 	SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
+	SND_PCI_QUIRK(0x1043, 0x18d3, "ASUS UM3504DA", ALC294_FIXUP_CS35L41_I2C_2),
 	SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1043, 0x194e, "ASUS UX563FD", ALC294_FIXUP_ASUS_HPE),
 	SND_PCI_QUIRK(0x1043, 0x1970, "ASUS UX550VE", ALC289_FIXUP_ASUS_GA401),
@@ -10183,6 +10206,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C),
 	SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C),
 	SND_PCI_QUIRK(0x17aa, 0x3881, "YB9 dual power mode2 YC", ALC287_FIXUP_TAS2781_I2C),
+	SND_PCI_QUIRK(0x17aa, 0x3882, "Lenovo Yoga Pro 7 14APH8", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
 	SND_PCI_QUIRK(0x17aa, 0x3884, "Y780 YG DUAL", ALC287_FIXUP_TAS2781_I2C),
 	SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C),
 	SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C),
@@ -10244,11 +10268,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
+	SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO),
 	SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
 	SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
 	SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10),
 	SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK),
 	SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0xf111, 0x0005, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
 
 #if 0
 	/* Below is a quirk table taken from the old code.
@@ -12174,6 +12201,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x32f7, "Lenovo ThinkCentre M90", ALC897_FIXUP_HEADSET_MIC_PIN),
 	SND_PCI_QUIRK(0x17aa, 0x3321, "Lenovo ThinkCentre M70 Gen4", ALC897_FIXUP_HEADSET_MIC_PIN),
 	SND_PCI_QUIRK(0x17aa, 0x331b, "Lenovo ThinkCentre M90 Gen4", ALC897_FIXUP_HEADSET_MIC_PIN),
+	SND_PCI_QUIRK(0x17aa, 0x3364, "Lenovo ThinkCentre M90 Gen5", ALC897_FIXUP_HEADSET_MIC_PIN),
 	SND_PCI_QUIRK(0x17aa, 0x3742, "Lenovo TianYi510Pro-14IOB", ALC897_FIXUP_HEADSET_MIC_PIN2),
 	SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD),
 	SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD),
diff --git a/sound/soc/amd/acp-config.c b/sound/soc/amd/acp-config.c
index 20cee7104c2b..3bc4b2e41650 100644
--- a/sound/soc/amd/acp-config.c
+++ b/sound/soc/amd/acp-config.c
@@ -111,6 +111,20 @@ static const struct config_entry config_table[] = {
 				.matches = {
 					DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "HUAWEI"),
 					DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HVY-WXX9"),
+					DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "M1010"),
+				},
+			},
+			{}
+		},
+	},
+	{
+		.flags = FLAG_AMD_LEGACY,
+		.device = ACP_PCI_DEV_ID,
+		.dmi_table = (const struct dmi_system_id []) {
+			{
+				.matches = {
+					DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "HUAWEI"),
+					DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HVY-WXX9"),
 					DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "M1020"),
 				},
 			},
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index 15a864dcd7bd..d83cb6e4c62a 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -286,6 +286,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
 	{
 		.driver_data = &acp6x_card,
 		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "E1504FA"),
+		}
+	},
+	{
+		.driver_data = &acp6x_card,
+		.matches = {
 			DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 B7ED"),
 		}
@@ -370,6 +377,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
 	{
 		.driver_data = &acp6x_card,
 		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+			DMI_MATCH(DMI_BOARD_NAME, "8B2F"),
+		}
+	},
+	{
+		.driver_data = &acp6x_card,
+		.matches = {
 			DMI_MATCH(DMI_BOARD_VENDOR, "MECHREVO"),
 			DMI_MATCH(DMI_BOARD_NAME, "MRID6"),
 		}
@@ -381,6 +395,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
 			DMI_MATCH(DMI_PRODUCT_VERSION, "pang12"),
 		}
 	},
+	{
+		.driver_data = &acp6x_card,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "System76"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "pang13"),
+		}
+	},
 	{}
 };
 
diff --git a/sound/soc/codecs/cs35l41-lib.c b/sound/soc/codecs/cs35l41-lib.c
index 4569e4f7cf7e..e9993a39f7d0 100644
--- a/sound/soc/codecs/cs35l41-lib.c
+++ b/sound/soc/codecs/cs35l41-lib.c
@@ -16,6 +16,8 @@
 
 #include <sound/cs35l41.h>
 
+#define CS35L41_FIRMWARE_OLD_VERSION 0x001C00 /* v0.28.0 */
+
 static const struct reg_default cs35l41_reg[] = {
 	{ CS35L41_PWR_CTRL1,			0x00000000 },
 	{ CS35L41_PWR_CTRL2,			0x00000000 },
@@ -1214,7 +1216,7 @@ EXPORT_SYMBOL_GPL(cs35l41_safe_reset);
  * the PLL Lock interrupt, in the IRQ handler.
  */
 int cs35l41_global_enable(struct device *dev, struct regmap *regmap, enum cs35l41_boost_type b_type,
-			  int enable, bool firmware_running)
+			  int enable, struct cs_dsp *dsp)
 {
 	int ret;
 	unsigned int gpio1_func, pad_control, pwr_ctrl1, pwr_ctrl3, int_status, pup_pdn_mask;
@@ -1309,7 +1311,7 @@ int cs35l41_global_enable(struct device *dev, struct regmap *regmap, enum cs35l4
 			}
 			regmap_write(regmap, CS35L41_IRQ1_STATUS1, CS35L41_PUP_DONE_MASK);
 
-			if (firmware_running)
+			if (dsp->running && dsp->fw_id_version > CS35L41_FIRMWARE_OLD_VERSION)
 				ret = cs35l41_set_cspl_mbox_cmd(dev, regmap,
 								CSPL_MBOX_CMD_SPK_OUT_ENABLE);
 			else
diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c
index d0e9128ac6d0..dfb4ce53491b 100644
--- a/sound/soc/codecs/cs35l41.c
+++ b/sound/soc/codecs/cs35l41.c
@@ -519,11 +519,11 @@ static int cs35l41_main_amp_event(struct snd_soc_dapm_widget *w,
 						ARRAY_SIZE(cs35l41_pup_patch));
 
 		ret = cs35l41_global_enable(cs35l41->dev, cs35l41->regmap, cs35l41->hw_cfg.bst_type,
-					    1, cs35l41->dsp.cs_dsp.running);
+					    1, &cs35l41->dsp.cs_dsp);
 		break;
 	case SND_SOC_DAPM_POST_PMD:
 		ret = cs35l41_global_enable(cs35l41->dev, cs35l41->regmap, cs35l41->hw_cfg.bst_type,
-					    0, cs35l41->dsp.cs_dsp.running);
+					    0, &cs35l41->dsp.cs_dsp);
 
 		regmap_multi_reg_write_bypassed(cs35l41->regmap,
 						cs35l41_pdn_patch,
diff --git a/sound/soc/codecs/cs43130.c b/sound/soc/codecs/cs43130.c
index 0b40fdfb1825..d8ec325b9cc9 100644
--- a/sound/soc/codecs/cs43130.c
+++ b/sound/soc/codecs/cs43130.c
@@ -578,7 +578,7 @@ static int cs43130_set_sp_fmt(int dai_id, unsigned int bitwidth_sclk,
 		break;
 	case SND_SOC_DAIFMT_LEFT_J:
 		hi_size = bitwidth_sclk;
-		frm_delay = 2;
+		frm_delay = 0;
 		frm_phase = 1;
 		break;
 	case SND_SOC_DAIFMT_DSP_A:
@@ -1682,7 +1682,7 @@ static ssize_t hpload_dc_r_show(struct device *dev,
 	return cs43130_show_dc(dev, buf, HP_RIGHT);
 }
 
-static u16 const cs43130_ac_freq[CS43130_AC_FREQ] = {
+static const u16 cs43130_ac_freq[CS43130_AC_FREQ] = {
 	24,
 	43,
 	93,
@@ -2362,7 +2362,7 @@ static const struct regmap_config cs43130_regmap = {
 	.use_single_write	= true,
 };
 
-static u16 const cs43130_dc_threshold[CS43130_DC_THRESHOLD] = {
+static const u16 cs43130_dc_threshold[CS43130_DC_THRESHOLD] = {
 	50,
 	120,
 };
diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c
index 4c4405942779..6bc068cdcbe2 100644
--- a/sound/soc/codecs/da7219-aad.c
+++ b/sound/soc/codecs/da7219-aad.c
@@ -696,7 +696,7 @@ static struct da7219_aad_pdata *da7219_aad_fw_to_pdata(struct device *dev)
 		aad_pdata->mic_det_thr =
 			da7219_aad_fw_mic_det_thr(dev, fw_val32);
 	else
-		aad_pdata->mic_det_thr = DA7219_AAD_MIC_DET_THR_500_OHMS;
+		aad_pdata->mic_det_thr = DA7219_AAD_MIC_DET_THR_200_OHMS;
 
 	if (fwnode_property_read_u32(aad_np, "dlg,jack-ins-deb", &fw_val32) >= 0)
 		aad_pdata->jack_ins_deb =
diff --git a/sound/soc/codecs/hdac_hda.c b/sound/soc/codecs/hdac_hda.c
index 355f30779a34..b075689db2dc 100644
--- a/sound/soc/codecs/hdac_hda.c
+++ b/sound/soc/codecs/hdac_hda.c
@@ -132,6 +132,9 @@ static struct snd_soc_dai_driver hdac_hda_dais[] = {
 		.sig_bits = 24,
 	},
 },
+};
+
+static struct snd_soc_dai_driver hdac_hda_hdmi_dais[] = {
 {
 	.id = HDAC_HDMI_0_DAI_ID,
 	.name = "intel-hdmi-hifi1",
@@ -607,8 +610,16 @@ static const struct snd_soc_component_driver hdac_hda_codec = {
 	.endianness		= 1,
 };
 
+static const struct snd_soc_component_driver hdac_hda_hdmi_codec = {
+	.probe			= hdac_hda_codec_probe,
+	.remove			= hdac_hda_codec_remove,
+	.idle_bias_on		= false,
+	.endianness		= 1,
+};
+
 static int hdac_hda_dev_probe(struct hdac_device *hdev)
 {
+	struct hdac_hda_priv *hda_pvt = dev_get_drvdata(&hdev->dev);
 	struct hdac_ext_link *hlink;
 	int ret;
 
@@ -621,9 +632,15 @@ static int hdac_hda_dev_probe(struct hdac_device *hdev)
 	snd_hdac_ext_bus_link_get(hdev->bus, hlink);
 
 	/* ASoC specific initialization */
-	ret = devm_snd_soc_register_component(&hdev->dev,
-					 &hdac_hda_codec, hdac_hda_dais,
-					 ARRAY_SIZE(hdac_hda_dais));
+	if (hda_pvt->need_display_power)
+		ret = devm_snd_soc_register_component(&hdev->dev,
+						&hdac_hda_hdmi_codec, hdac_hda_hdmi_dais,
+						ARRAY_SIZE(hdac_hda_hdmi_dais));
+	else
+		ret = devm_snd_soc_register_component(&hdev->dev,
+						&hdac_hda_codec, hdac_hda_dais,
+						ARRAY_SIZE(hdac_hda_dais));
+
 	if (ret < 0) {
 		dev_err(&hdev->dev, "failed to register HDA codec %d\n", ret);
 		return ret;
diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c
index 82f9873ffada..124c2e144f33 100644
--- a/sound/soc/codecs/lpass-tx-macro.c
+++ b/sound/soc/codecs/lpass-tx-macro.c
@@ -2021,6 +2021,11 @@ static int tx_macro_probe(struct platform_device *pdev)
 
 	tx->dev = dev;
 
+	/* Set active_decimator default value */
+	tx->active_decimator[TX_MACRO_AIF1_CAP] = -1;
+	tx->active_decimator[TX_MACRO_AIF2_CAP] = -1;
+	tx->active_decimator[TX_MACRO_AIF3_CAP] = -1;
+
 	/* set MCLK and NPL rates */
 	clk_set_rate(tx->mclk, MCLK_FREQ);
 	clk_set_rate(tx->npl, MCLK_FREQ);
diff --git a/sound/soc/codecs/nau8822.c b/sound/soc/codecs/nau8822.c
index ff3024899f45..7199d734c79f 100644
--- a/sound/soc/codecs/nau8822.c
+++ b/sound/soc/codecs/nau8822.c
@@ -184,6 +184,7 @@ static int nau8822_eq_get(struct snd_kcontrol *kcontrol,
 	struct soc_bytes_ext *params = (void *)kcontrol->private_value;
 	int i, reg;
 	u16 reg_val, *val;
+	__be16 tmp;
 
 	val = (u16 *)ucontrol->value.bytes.data;
 	reg = NAU8822_REG_EQ1;
@@ -192,8 +193,8 @@ static int nau8822_eq_get(struct snd_kcontrol *kcontrol,
 		/* conversion of 16-bit integers between native CPU format
 		 * and big endian format
 		 */
-		reg_val = cpu_to_be16(reg_val);
-		memcpy(val + i, &reg_val, sizeof(reg_val));
+		tmp = cpu_to_be16(reg_val);
+		memcpy(val + i, &tmp, sizeof(tmp));
 	}
 
 	return 0;
@@ -216,6 +217,7 @@ static int nau8822_eq_put(struct snd_kcontrol *kcontrol,
 	void *data;
 	u16 *val, value;
 	int i, reg, ret;
+	__be16 *tmp;
 
 	data = kmemdup(ucontrol->value.bytes.data,
 		params->max, GFP_KERNEL | GFP_DMA);
@@ -228,7 +230,8 @@ static int nau8822_eq_put(struct snd_kcontrol *kcontrol,
 		/* conversion of 16-bit integers between native CPU format
 		 * and big endian format
 		 */
-		value = be16_to_cpu(*(val + i));
+		tmp = (__be16 *)(val + i);
+		value = be16_to_cpup(tmp);
 		ret = snd_soc_component_write(component, reg + i, value);
 		if (ret) {
 			dev_err(component->dev,
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 7938b52d741d..a0d01d71d8b5 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -448,6 +448,7 @@ struct rt5645_priv {
 	struct regulator_bulk_data supplies[ARRAY_SIZE(rt5645_supply_names)];
 	struct rt5645_eq_param_s *eq_param;
 	struct timer_list btn_check_timer;
+	struct mutex jd_mutex;
 
 	int codec_type;
 	int sysclk;
@@ -3193,6 +3194,8 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse
 				rt5645_enable_push_button_irq(component, true);
 			}
 		} else {
+			if (rt5645->en_button_func)
+				rt5645_enable_push_button_irq(component, false);
 			snd_soc_dapm_disable_pin(dapm, "Mic Det Power");
 			snd_soc_dapm_sync(dapm);
 			rt5645->jack_type = SND_JACK_HEADPHONE;
@@ -3295,6 +3298,8 @@ static void rt5645_jack_detect_work(struct work_struct *work)
 	if (!rt5645->component)
 		return;
 
+	mutex_lock(&rt5645->jd_mutex);
+
 	switch (rt5645->pdata.jd_mode) {
 	case 0: /* Not using rt5645 JD */
 		if (rt5645->gpiod_hp_det) {
@@ -3321,7 +3326,7 @@ static void rt5645_jack_detect_work(struct work_struct *work)
 
 	if (!val && (rt5645->jack_type == 0)) { /* jack in */
 		report = rt5645_jack_detect(rt5645->component, 1);
-	} else if (!val && rt5645->jack_type != 0) {
+	} else if (!val && rt5645->jack_type == SND_JACK_HEADSET) {
 		/* for push button and jack out */
 		btn_type = 0;
 		if (snd_soc_component_read(rt5645->component, RT5645_INT_IRQ_ST) & 0x4) {
@@ -3377,6 +3382,8 @@ static void rt5645_jack_detect_work(struct work_struct *work)
 		rt5645_jack_detect(rt5645->component, 0);
 	}
 
+	mutex_unlock(&rt5645->jd_mutex);
+
 	snd_soc_jack_report(rt5645->hp_jack, report, SND_JACK_HEADPHONE);
 	snd_soc_jack_report(rt5645->mic_jack, report, SND_JACK_MICROPHONE);
 	if (rt5645->en_button_func)
@@ -4150,6 +4157,7 @@ static int rt5645_i2c_probe(struct i2c_client *i2c)
 	}
 	timer_setup(&rt5645->btn_check_timer, rt5645_btn_check_callback, 0);
 
+	mutex_init(&rt5645->jd_mutex);
 	INIT_DELAYED_WORK(&rt5645->jack_detect_work, rt5645_jack_detect_work);
 	INIT_DELAYED_WORK(&rt5645->rcclock_work, rt5645_rcclock_work);
 
diff --git a/sound/soc/codecs/wm8974.c b/sound/soc/codecs/wm8974.c
index 044b6f604c09..260bac695b20 100644
--- a/sound/soc/codecs/wm8974.c
+++ b/sound/soc/codecs/wm8974.c
@@ -186,7 +186,7 @@ SOC_DAPM_SINGLE("PCM Playback Switch", WM8974_MONOMIX, 0, 1, 0),
 
 /* Boost mixer */
 static const struct snd_kcontrol_new wm8974_boost_mixer[] = {
-SOC_DAPM_SINGLE("Aux Switch", WM8974_INPPGA, 6, 1, 1),
+SOC_DAPM_SINGLE("PGA Switch", WM8974_INPPGA, 6, 1, 1),
 };
 
 /* Input PGA */
@@ -246,8 +246,8 @@ static const struct snd_soc_dapm_route wm8974_dapm_routes[] = {
 
 	/* Boost Mixer */
 	{"ADC", NULL, "Boost Mixer"},
-	{"Boost Mixer", "Aux Switch", "Aux Input"},
-	{"Boost Mixer", NULL, "Input PGA"},
+	{"Boost Mixer", NULL, "Aux Input"},
+	{"Boost Mixer", "PGA Switch", "Input PGA"},
 	{"Boost Mixer", NULL, "MICP"},
 
 	/* Input PGA */
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 236b12b69ae5..c01e31175015 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -1451,12 +1451,12 @@ static int wm_adsp_buffer_populate(struct wm_adsp_compr_buf *buf)
 		ret = wm_adsp_buffer_read(buf, caps->region_defs[i].base_offset,
 					  &region->base_addr);
 		if (ret < 0)
-			return ret;
+			goto err;
 
 		ret = wm_adsp_buffer_read(buf, caps->region_defs[i].size_offset,
 					  &offset);
 		if (ret < 0)
-			return ret;
+			goto err;
 
 		region->cumulative_size = offset;
 
@@ -1467,6 +1467,10 @@ static int wm_adsp_buffer_populate(struct wm_adsp_compr_buf *buf)
 	}
 
 	return 0;
+
+err:
+	kfree(buf->regions);
+	return ret;
 }
 
 static void wm_adsp_buffer_clear(struct wm_adsp_compr_buf *buf)
diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index 725c530a3636..be342ee03fb9 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -360,6 +360,7 @@ config SND_SOC_IMX_HDMI
 config SND_SOC_IMX_RPMSG
 	tristate "SoC Audio support for i.MX boards with rpmsg"
 	depends on RPMSG
+	depends on OF && I2C
 	select SND_SOC_IMX_PCM_RPMSG
 	select SND_SOC_IMX_AUDIO_RPMSG
 	help
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index 79e7c6b98a75..32bbe5056a63 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -673,6 +673,20 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream,
 			   FSL_SAI_CR3_TRCE_MASK,
 			   FSL_SAI_CR3_TRCE((dl_cfg[dl_cfg_idx].mask[tx] & trce_mask)));
 
+	/*
+	 * When the TERE and FSD_MSTR enabled before configuring the word width
+	 * There will be no frame sync clock issue, because word width impact
+	 * the generation of frame sync clock.
+	 *
+	 * TERE enabled earlier only for i.MX8MP case for the hardware limitation,
+	 * We need to disable FSD_MSTR before configuring word width, then enable
+	 * FSD_MSTR bit for this specific case.
+	 */
+	if (sai->soc_data->mclk_with_tere && sai->mclk_direction_output &&
+	    !sai->is_consumer_mode)
+		regmap_update_bits(sai->regmap, FSL_SAI_xCR4(tx, ofs),
+				   FSL_SAI_CR4_FSD_MSTR, 0);
+
 	regmap_update_bits(sai->regmap, FSL_SAI_xCR4(tx, ofs),
 			   FSL_SAI_CR4_SYWD_MASK | FSL_SAI_CR4_FRSZ_MASK |
 			   FSL_SAI_CR4_CHMOD_MASK,
@@ -680,6 +694,13 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream,
 	regmap_update_bits(sai->regmap, FSL_SAI_xCR5(tx, ofs),
 			   FSL_SAI_CR5_WNW_MASK | FSL_SAI_CR5_W0W_MASK |
 			   FSL_SAI_CR5_FBT_MASK, val_cr5);
+
+	/* Enable FSD_MSTR after configuring word width */
+	if (sai->soc_data->mclk_with_tere && sai->mclk_direction_output &&
+	    !sai->is_consumer_mode)
+		regmap_update_bits(sai->regmap, FSL_SAI_xCR4(tx, ofs),
+				   FSL_SAI_CR4_FSD_MSTR, FSL_SAI_CR4_FSD_MSTR);
+
 	regmap_write(sai->regmap, FSL_SAI_xMR(tx),
 		     ~0UL - ((1 << min(channels, slots)) - 1));
 
diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c
index fa0a15263c66..f0fb33d719c2 100644
--- a/sound/soc/fsl/fsl_xcvr.c
+++ b/sound/soc/fsl/fsl_xcvr.c
@@ -358,7 +358,7 @@ static int fsl_xcvr_en_aud_pll(struct fsl_xcvr *xcvr, u32 freq)
 	struct device *dev = &xcvr->pdev->dev;
 	int ret;
 
-	freq = xcvr->soc_data->spdif_only ? freq / 10 : freq;
+	freq = xcvr->soc_data->spdif_only ? freq / 5 : freq;
 	clk_disable_unprepare(xcvr->phy_clk);
 	ret = clk_set_rate(xcvr->phy_clk, freq);
 	if (ret < 0) {
@@ -409,11 +409,21 @@ static int fsl_xcvr_prepare(struct snd_pcm_substream *substream,
 	bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
 	u32 m_ctl = 0, v_ctl = 0;
 	u32 r = substream->runtime->rate, ch = substream->runtime->channels;
-	u32 fout = 32 * r * ch * 10 * 2;
+	u32 fout = 32 * r * ch * 10;
 	int ret = 0;
 
 	switch (xcvr->mode) {
 	case FSL_XCVR_MODE_SPDIF:
+		if (xcvr->soc_data->spdif_only && tx) {
+			ret = regmap_update_bits(xcvr->regmap, FSL_XCVR_TX_DPTH_CTRL_SET,
+						 FSL_XCVR_TX_DPTH_CTRL_BYPASS_FEM,
+						 FSL_XCVR_TX_DPTH_CTRL_BYPASS_FEM);
+			if (ret < 0) {
+				dev_err(dai->dev, "Failed to set bypass fem: %d\n", ret);
+				return ret;
+			}
+		}
+		fallthrough;
 	case FSL_XCVR_MODE_ARC:
 		if (tx) {
 			ret = fsl_xcvr_en_aud_pll(xcvr, fout);
diff --git a/sound/soc/intel/boards/skl_hda_dsp_generic.c b/sound/soc/intel/boards/skl_hda_dsp_generic.c
index 6c6ef63cd5d9..6e172719c979 100644
--- a/sound/soc/intel/boards/skl_hda_dsp_generic.c
+++ b/sound/soc/intel/boards/skl_hda_dsp_generic.c
@@ -154,6 +154,8 @@ static int skl_hda_fill_card_info(struct snd_soc_acpi_mach_params *mach_params)
 		card->dapm_widgets = skl_hda_widgets;
 		card->num_dapm_widgets = ARRAY_SIZE(skl_hda_widgets);
 		if (!ctx->idisp_codec) {
+			card->dapm_routes = &skl_hda_map[IDISP_ROUTE_COUNT];
+			num_route -= IDISP_ROUTE_COUNT;
 			for (i = 0; i < IDISP_DAI_COUNT; i++) {
 				skl_hda_be_dai_links[i].codecs = &snd_soc_dummy_dlc;
 				skl_hda_be_dai_links[i].num_codecs = 1;
diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c
index 3312ad8a563b..4e4284729773 100644
--- a/sound/soc/intel/boards/sof_sdw.c
+++ b/sound/soc/intel/boards/sof_sdw.c
@@ -1546,7 +1546,7 @@ static int sof_card_dai_links_create(struct snd_soc_card *card)
 {
 	struct device *dev = card->dev;
 	struct snd_soc_acpi_mach *mach = dev_get_platdata(card->dev);
-	int sdw_be_num = 0, ssp_num = 0, dmic_num = 0, hdmi_num = 0, bt_num = 0;
+	int sdw_be_num = 0, ssp_num = 0, dmic_num = 0, bt_num = 0;
 	struct mc_private *ctx = snd_soc_card_get_drvdata(card);
 	struct snd_soc_acpi_mach_params *mach_params = &mach->mach_params;
 	const struct snd_soc_acpi_link_adr *adr_link = mach_params->links;
@@ -1564,6 +1564,7 @@ static int sof_card_dai_links_create(struct snd_soc_card *card)
 	char *codec_name, *codec_dai_name;
 	int i, j, be_id = 0;
 	int codec_index;
+	int hdmi_num;
 	int ret;
 
 	ret = get_dailink_info(dev, adr_link, &sdw_be_num, &codec_conf_num);
@@ -1584,14 +1585,13 @@ static int sof_card_dai_links_create(struct snd_soc_card *card)
 		ssp_num = hweight_long(ssp_mask);
 	}
 
-	if (mach_params->codec_mask & IDISP_CODEC_MASK) {
+	if (mach_params->codec_mask & IDISP_CODEC_MASK)
 		ctx->hdmi.idisp_codec = true;
 
-		if (sof_sdw_quirk & SOF_SDW_TGL_HDMI)
-			hdmi_num = SOF_TGL_HDMI_COUNT;
-		else
-			hdmi_num = SOF_PRE_TGL_HDMI_COUNT;
-	}
+	if (sof_sdw_quirk & SOF_SDW_TGL_HDMI)
+		hdmi_num = SOF_TGL_HDMI_COUNT;
+	else
+		hdmi_num = SOF_PRE_TGL_HDMI_COUNT;
 
 	/* enable dmic01 & dmic16k */
 	if (sof_sdw_quirk & SOF_SDW_PCH_DMIC || mach_params->dmic_num)
@@ -1601,7 +1601,8 @@ static int sof_card_dai_links_create(struct snd_soc_card *card)
 		bt_num = 1;
 
 	dev_dbg(dev, "sdw %d, ssp %d, dmic %d, hdmi %d, bt: %d\n",
-		sdw_be_num, ssp_num, dmic_num, hdmi_num, bt_num);
+		sdw_be_num, ssp_num, dmic_num,
+		ctx->hdmi.idisp_codec ? hdmi_num : 0, bt_num);
 
 	/* allocate BE dailinks */
 	num_links = sdw_be_num + ssp_num + dmic_num + hdmi_num + bt_num;
diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
index d0c02e8a6785..174aae6e0398 100644
--- a/sound/soc/intel/skylake/skl-pcm.c
+++ b/sound/soc/intel/skylake/skl-pcm.c
@@ -240,8 +240,10 @@ static int skl_pcm_open(struct snd_pcm_substream *substream,
 	snd_pcm_set_sync(substream);
 
 	mconfig = skl_tplg_fe_get_cpr_module(dai, substream->stream);
-	if (!mconfig)
+	if (!mconfig) {
+		kfree(dma_params);
 		return -EINVAL;
+	}
 
 	skl_tplg_d0i3_get(skl, mconfig->d0i3_caps);
 
@@ -1462,6 +1464,7 @@ int skl_platform_register(struct device *dev)
 		dais = krealloc(skl->dais, sizeof(skl_fe_dai) +
 				sizeof(skl_platform_dai), GFP_KERNEL);
 		if (!dais) {
+			kfree(skl->dais);
 			ret = -ENOMEM;
 			goto err;
 		}
@@ -1474,8 +1477,10 @@ int skl_platform_register(struct device *dev)
 
 	ret = devm_snd_soc_register_component(dev, &skl_component,
 					 skl->dais, num_dais);
-	if (ret)
+	if (ret) {
+		kfree(skl->dais);
 		dev_err(dev, "soc component registration failed %d\n", ret);
+	}
 err:
 	return ret;
 }
diff --git a/sound/soc/intel/skylake/skl-sst-ipc.c b/sound/soc/intel/skylake/skl-sst-ipc.c
index 7a425271b08b..fd9624ad5f72 100644
--- a/sound/soc/intel/skylake/skl-sst-ipc.c
+++ b/sound/soc/intel/skylake/skl-sst-ipc.c
@@ -1003,8 +1003,10 @@ int skl_ipc_get_large_config(struct sst_generic_ipc *ipc,
 
 	reply.size = (reply.header >> 32) & IPC_DATA_OFFSET_SZ_MASK;
 	buf = krealloc(reply.data, reply.size, GFP_KERNEL);
-	if (!buf)
+	if (!buf) {
+		kfree(reply.data);
 		return -ENOMEM;
+	}
 	*payload = buf;
 	*bytes = reply.size;
 
diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c
index d93b18f07be5..39cb0b889aff 100644
--- a/sound/soc/qcom/sc8280xp.c
+++ b/sound/soc/qcom/sc8280xp.c
@@ -27,6 +27,23 @@ struct sc8280xp_snd_data {
 static int sc8280xp_snd_init(struct snd_soc_pcm_runtime *rtd)
 {
 	struct sc8280xp_snd_data *data = snd_soc_card_get_drvdata(rtd->card);
+	struct snd_soc_dai *cpu_dai = snd_soc_rtd_to_cpu(rtd, 0);
+	struct snd_soc_card *card = rtd->card;
+
+	switch (cpu_dai->id) {
+	case WSA_CODEC_DMA_RX_0:
+	case WSA_CODEC_DMA_RX_1:
+		/*
+		 * set limit of 0dB on Digital Volume for Speakers,
+		 * this can prevent damage of speakers to some extent without
+		 * active speaker protection
+		 */
+		snd_soc_limit_volume(card, "WSA_RX0 Digital Volume", 84);
+		snd_soc_limit_volume(card, "WSA_RX1 Digital Volume", 84);
+		break;
+	default:
+		break;
+	}
 
 	return qcom_snd_wcd_jack_setup(rtd, &data->jack, &data->jack_setup);
 }
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index 55b009d3c681..2d25748ca706 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -661,7 +661,7 @@ int snd_soc_limit_volume(struct snd_soc_card *card,
 	kctl = snd_soc_card_get_kcontrol(card, name);
 	if (kctl) {
 		struct soc_mixer_control *mc = (struct soc_mixer_control *)kctl->private_value;
-		if (max <= mc->max) {
+		if (max <= mc->max - mc->min) {
 			mc->platform_max = max;
 			ret = 0;
 		}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 323e4d7b6adf..f6d1b2e11795 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -704,11 +704,6 @@ static int soc_pcm_clean(struct snd_soc_pcm_runtime *rtd,
 			if (snd_soc_dai_active(dai) == 0 &&
 			    (dai->rate || dai->channels || dai->sample_bits))
 				soc_pcm_set_dai_params(dai, NULL);
-
-			if (snd_soc_dai_stream_active(dai, substream->stream) ==  0) {
-				if (dai->driver->ops && !dai->driver->ops->mute_unmute_on_trigger)
-					snd_soc_dai_digital_mute(dai, 1, substream->stream);
-			}
 		}
 	}
 
@@ -947,8 +942,10 @@ static int soc_pcm_hw_clean(struct snd_soc_pcm_runtime *rtd,
 		if (snd_soc_dai_active(dai) == 1)
 			soc_pcm_set_dai_params(dai, NULL);
 
-		if (snd_soc_dai_stream_active(dai, substream->stream) == 1)
-			snd_soc_dai_digital_mute(dai, 1, substream->stream);
+		if (snd_soc_dai_stream_active(dai, substream->stream) == 1) {
+			if (dai->driver->ops && !dai->driver->ops->mute_unmute_on_trigger)
+				snd_soc_dai_digital_mute(dai, 1, substream->stream);
+		}
 	}
 
 	/* run the stream event */
diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c
index ba4ef290b634..2c7a5e7a364c 100644
--- a/sound/soc/sof/ipc3-topology.c
+++ b/sound/soc/sof/ipc3-topology.c
@@ -493,6 +493,7 @@ static int sof_ipc3_widget_setup_comp_mixer(struct snd_sof_widget *swidget)
 static int sof_ipc3_widget_setup_comp_pipeline(struct snd_sof_widget *swidget)
 {
 	struct snd_soc_component *scomp = swidget->scomp;
+	struct snd_sof_pipeline *spipe = swidget->spipe;
 	struct sof_ipc_pipe_new *pipeline;
 	struct snd_sof_widget *comp_swidget;
 	int ret;
@@ -545,6 +546,7 @@ static int sof_ipc3_widget_setup_comp_pipeline(struct snd_sof_widget *swidget)
 		swidget->dynamic_pipeline_widget);
 
 	swidget->core = pipeline->core;
+	spipe->core_mask |= BIT(pipeline->core);
 
 	return 0;
 
diff --git a/sound/soc/sof/ipc4-control.c b/sound/soc/sof/ipc4-control.c
index 938efaceb81c..b4cdcec33e12 100644
--- a/sound/soc/sof/ipc4-control.c
+++ b/sound/soc/sof/ipc4-control.c
@@ -89,7 +89,7 @@ sof_ipc4_set_volume_data(struct snd_sof_dev *sdev, struct snd_sof_widget *swidge
 	struct sof_ipc4_control_data *cdata = scontrol->ipc_control_data;
 	struct sof_ipc4_gain *gain = swidget->private;
 	struct sof_ipc4_msg *msg = &cdata->msg;
-	struct sof_ipc4_gain_data data;
+	struct sof_ipc4_gain_params params;
 	bool all_channels_equal = true;
 	u32 value;
 	int ret, i;
@@ -109,20 +109,20 @@ sof_ipc4_set_volume_data(struct snd_sof_dev *sdev, struct snd_sof_widget *swidge
 	 */
 	for (i = 0; i < scontrol->num_channels; i++) {
 		if (all_channels_equal) {
-			data.channels = SOF_IPC4_GAIN_ALL_CHANNELS_MASK;
-			data.init_val = cdata->chanv[0].value;
+			params.channels = SOF_IPC4_GAIN_ALL_CHANNELS_MASK;
+			params.init_val = cdata->chanv[0].value;
 		} else {
-			data.channels = cdata->chanv[i].channel;
-			data.init_val = cdata->chanv[i].value;
+			params.channels = cdata->chanv[i].channel;
+			params.init_val = cdata->chanv[i].value;
 		}
 
 		/* set curve type and duration from topology */
-		data.curve_duration_l = gain->data.curve_duration_l;
-		data.curve_duration_h = gain->data.curve_duration_h;
-		data.curve_type = gain->data.curve_type;
+		params.curve_duration_l = gain->data.params.curve_duration_l;
+		params.curve_duration_h = gain->data.params.curve_duration_h;
+		params.curve_type = gain->data.params.curve_type;
 
-		msg->data_ptr = &data;
-		msg->data_size = sizeof(data);
+		msg->data_ptr = &params;
+		msg->data_size = sizeof(params);
 
 		ret = sof_ipc4_set_get_kcontrol_data(scontrol, true, lock);
 		msg->data_ptr = NULL;
diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index b24a64377f68..e012b6e166ac 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -130,18 +130,18 @@ static const struct sof_topology_token comp_ext_tokens[] = {
 
 static const struct sof_topology_token gain_tokens[] = {
 	{SOF_TKN_GAIN_RAMP_TYPE, SND_SOC_TPLG_TUPLE_TYPE_WORD,
-		get_token_u32, offsetof(struct sof_ipc4_gain_data, curve_type)},
+		get_token_u32, offsetof(struct sof_ipc4_gain_params, curve_type)},
 	{SOF_TKN_GAIN_RAMP_DURATION,
 		SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32,
-		offsetof(struct sof_ipc4_gain_data, curve_duration_l)},
+		offsetof(struct sof_ipc4_gain_params, curve_duration_l)},
 	{SOF_TKN_GAIN_VAL, SND_SOC_TPLG_TUPLE_TYPE_WORD,
-		get_token_u32, offsetof(struct sof_ipc4_gain_data, init_val)},
+		get_token_u32, offsetof(struct sof_ipc4_gain_params, init_val)},
 };
 
 /* SRC */
 static const struct sof_topology_token src_tokens[] = {
 	{SOF_TKN_SRC_RATE_OUT, SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32,
-		offsetof(struct sof_ipc4_src, sink_rate)},
+		offsetof(struct sof_ipc4_src_data, sink_rate)},
 };
 
 static const struct sof_token_info ipc4_token_list[SOF_TOKEN_COUNT] = {
@@ -656,6 +656,7 @@ static int sof_ipc4_widget_setup_comp_pipeline(struct snd_sof_widget *swidget)
 {
 	struct snd_soc_component *scomp = swidget->scomp;
 	struct sof_ipc4_pipeline *pipeline;
+	struct snd_sof_pipeline *spipe = swidget->spipe;
 	int ret;
 
 	pipeline = kzalloc(sizeof(*pipeline), GFP_KERNEL);
@@ -670,6 +671,7 @@ static int sof_ipc4_widget_setup_comp_pipeline(struct snd_sof_widget *swidget)
 	}
 
 	swidget->core = pipeline->core_id;
+	spipe->core_mask |= BIT(pipeline->core_id);
 
 	if (pipeline->use_chain_dma) {
 		dev_dbg(scomp->dev, "Set up chain DMA for %s\n", swidget->widget->name);
@@ -718,15 +720,15 @@ static int sof_ipc4_widget_setup_comp_pga(struct snd_sof_widget *swidget)
 
 	swidget->private = gain;
 
-	gain->data.channels = SOF_IPC4_GAIN_ALL_CHANNELS_MASK;
-	gain->data.init_val = SOF_IPC4_VOL_ZERO_DB;
+	gain->data.params.channels = SOF_IPC4_GAIN_ALL_CHANNELS_MASK;
+	gain->data.params.init_val = SOF_IPC4_VOL_ZERO_DB;
 
-	ret = sof_ipc4_get_audio_fmt(scomp, swidget, &gain->available_fmt, &gain->base_config);
+	ret = sof_ipc4_get_audio_fmt(scomp, swidget, &gain->available_fmt, &gain->data.base_config);
 	if (ret)
 		goto err;
 
-	ret = sof_update_ipc_object(scomp, &gain->data, SOF_GAIN_TOKENS, swidget->tuples,
-				    swidget->num_tuples, sizeof(gain->data), 1);
+	ret = sof_update_ipc_object(scomp, &gain->data.params, SOF_GAIN_TOKENS,
+				    swidget->tuples, swidget->num_tuples, sizeof(gain->data), 1);
 	if (ret) {
 		dev_err(scomp->dev, "Parsing gain tokens failed\n");
 		goto err;
@@ -734,8 +736,8 @@ static int sof_ipc4_widget_setup_comp_pga(struct snd_sof_widget *swidget)
 
 	dev_dbg(scomp->dev,
 		"pga widget %s: ramp type: %d, ramp duration %d, initial gain value: %#x\n",
-		swidget->widget->name, gain->data.curve_type, gain->data.curve_duration_l,
-		gain->data.init_val);
+		swidget->widget->name, gain->data.params.curve_type,
+		gain->data.params.curve_duration_l, gain->data.params.init_val);
 
 	ret = sof_ipc4_widget_setup_msg(swidget, &gain->msg);
 	if (ret)
@@ -797,6 +799,7 @@ err:
 static int sof_ipc4_widget_setup_comp_src(struct snd_sof_widget *swidget)
 {
 	struct snd_soc_component *scomp = swidget->scomp;
+	struct snd_sof_pipeline *spipe = swidget->spipe;
 	struct sof_ipc4_src *src;
 	int ret;
 
@@ -808,18 +811,21 @@ static int sof_ipc4_widget_setup_comp_src(struct snd_sof_widget *swidget)
 
 	swidget->private = src;
 
-	ret = sof_ipc4_get_audio_fmt(scomp, swidget, &src->available_fmt, &src->base_config);
+	ret = sof_ipc4_get_audio_fmt(scomp, swidget, &src->available_fmt,
+				     &src->data.base_config);
 	if (ret)
 		goto err;
 
-	ret = sof_update_ipc_object(scomp, src, SOF_SRC_TOKENS, swidget->tuples,
+	ret = sof_update_ipc_object(scomp, &src->data, SOF_SRC_TOKENS, swidget->tuples,
 				    swidget->num_tuples, sizeof(*src), 1);
 	if (ret) {
 		dev_err(scomp->dev, "Parsing SRC tokens failed\n");
 		goto err;
 	}
 
-	dev_dbg(scomp->dev, "SRC sink rate %d\n", src->sink_rate);
+	spipe->core_mask |= BIT(swidget->core);
+
+	dev_dbg(scomp->dev, "SRC sink rate %d\n", src->data.sink_rate);
 
 	ret = sof_ipc4_widget_setup_msg(swidget, &src->msg);
 	if (ret)
@@ -864,6 +870,7 @@ static int sof_ipc4_widget_setup_comp_process(struct snd_sof_widget *swidget)
 {
 	struct snd_soc_component *scomp = swidget->scomp;
 	struct sof_ipc4_fw_module *fw_module;
+	struct snd_sof_pipeline *spipe = swidget->spipe;
 	struct sof_ipc4_process *process;
 	void *cfg;
 	int ret;
@@ -920,6 +927,9 @@ static int sof_ipc4_widget_setup_comp_process(struct snd_sof_widget *swidget)
 
 	sof_ipc4_widget_update_kcontrol_module_id(swidget);
 
+	/* set pipeline core mask to keep track of the core the module is scheduled to run on */
+	spipe->core_mask |= BIT(swidget->core);
+
 	return 0;
 free_base_cfg_ext:
 	kfree(process->base_config_ext);
@@ -1816,7 +1826,7 @@ static int sof_ipc4_prepare_gain_module(struct snd_sof_widget *swidget,
 	u32 out_ref_rate, out_ref_channels, out_ref_valid_bits;
 	int ret;
 
-	ret = sof_ipc4_init_input_audio_fmt(sdev, swidget, &gain->base_config,
+	ret = sof_ipc4_init_input_audio_fmt(sdev, swidget, &gain->data.base_config,
 					    pipeline_params, available_fmt);
 	if (ret < 0)
 		return ret;
@@ -1826,7 +1836,7 @@ static int sof_ipc4_prepare_gain_module(struct snd_sof_widget *swidget,
 	out_ref_channels = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(in_fmt->fmt_cfg);
 	out_ref_valid_bits = SOF_IPC4_AUDIO_FORMAT_CFG_V_BIT_DEPTH(in_fmt->fmt_cfg);
 
-	ret = sof_ipc4_init_output_audio_fmt(sdev, &gain->base_config, available_fmt,
+	ret = sof_ipc4_init_output_audio_fmt(sdev, &gain->data.base_config, available_fmt,
 					     out_ref_rate, out_ref_channels, out_ref_valid_bits);
 	if (ret < 0) {
 		dev_err(sdev->dev, "Failed to initialize output format for %s",
@@ -1835,7 +1845,7 @@ static int sof_ipc4_prepare_gain_module(struct snd_sof_widget *swidget,
 	}
 
 	/* update pipeline memory usage */
-	sof_ipc4_update_resource_usage(sdev, swidget, &gain->base_config);
+	sof_ipc4_update_resource_usage(sdev, swidget, &gain->data.base_config);
 
 	return 0;
 }
@@ -1891,7 +1901,7 @@ static int sof_ipc4_prepare_src_module(struct snd_sof_widget *swidget,
 	u32 out_ref_rate, out_ref_channels, out_ref_valid_bits;
 	int output_format_index, input_format_index;
 
-	input_format_index = sof_ipc4_init_input_audio_fmt(sdev, swidget, &src->base_config,
+	input_format_index = sof_ipc4_init_input_audio_fmt(sdev, swidget, &src->data.base_config,
 							   pipeline_params, available_fmt);
 	if (input_format_index < 0)
 		return input_format_index;
@@ -1921,7 +1931,7 @@ static int sof_ipc4_prepare_src_module(struct snd_sof_widget *swidget,
 	 */
 	out_ref_rate = params_rate(fe_params);
 
-	output_format_index = sof_ipc4_init_output_audio_fmt(sdev, &src->base_config,
+	output_format_index = sof_ipc4_init_output_audio_fmt(sdev, &src->data.base_config,
 							     available_fmt, out_ref_rate,
 							     out_ref_channels, out_ref_valid_bits);
 	if (output_format_index < 0) {
@@ -1931,10 +1941,10 @@ static int sof_ipc4_prepare_src_module(struct snd_sof_widget *swidget,
 	}
 
 	/* update pipeline memory usage */
-	sof_ipc4_update_resource_usage(sdev, swidget, &src->base_config);
+	sof_ipc4_update_resource_usage(sdev, swidget, &src->data.base_config);
 
 	out_audio_fmt = &available_fmt->output_pin_fmts[output_format_index].audio_fmt;
-	src->sink_rate = out_audio_fmt->sampling_frequency;
+	src->data.sink_rate = out_audio_fmt->sampling_frequency;
 
 	/* update pipeline_params for sink widgets */
 	return sof_ipc4_update_hw_params(sdev, pipeline_params, out_audio_fmt);
@@ -2314,9 +2324,8 @@ static int sof_ipc4_widget_setup(struct snd_sof_dev *sdev, struct snd_sof_widget
 	{
 		struct sof_ipc4_gain *gain = swidget->private;
 
-		ipc_size = sizeof(struct sof_ipc4_base_module_cfg) +
-			   sizeof(struct sof_ipc4_gain_data);
-		ipc_data = gain;
+		ipc_size = sizeof(gain->data);
+		ipc_data = &gain->data;
 
 		msg = &gain->msg;
 		break;
@@ -2335,8 +2344,8 @@ static int sof_ipc4_widget_setup(struct snd_sof_dev *sdev, struct snd_sof_widget
 	{
 		struct sof_ipc4_src *src = swidget->private;
 
-		ipc_size = sizeof(struct sof_ipc4_base_module_cfg) + sizeof(src->sink_rate);
-		ipc_data = src;
+		ipc_size = sizeof(src->data);
+		ipc_data = &src->data;
 
 		msg = &src->msg;
 		break;
diff --git a/sound/soc/sof/ipc4-topology.h b/sound/soc/sof/ipc4-topology.h
index 0a57b8ab3e08..dce174a190dd 100644
--- a/sound/soc/sof/ipc4-topology.h
+++ b/sound/soc/sof/ipc4-topology.h
@@ -361,7 +361,7 @@ struct sof_ipc4_control_msg_payload {
 } __packed;
 
 /**
- * struct sof_ipc4_gain_data - IPC gain blob
+ * struct sof_ipc4_gain_params - IPC gain parameters
  * @channels: Channels
  * @init_val: Initial value
  * @curve_type: Curve type
@@ -369,24 +369,32 @@ struct sof_ipc4_control_msg_payload {
  * @curve_duration_l: Curve duration low part
  * @curve_duration_h: Curve duration high part
  */
-struct sof_ipc4_gain_data {
+struct sof_ipc4_gain_params {
 	uint32_t channels;
 	uint32_t init_val;
 	uint32_t curve_type;
 	uint32_t reserved;
 	uint32_t curve_duration_l;
 	uint32_t curve_duration_h;
-} __aligned(8);
+} __packed __aligned(4);
 
 /**
- * struct sof_ipc4_gain - gain config data
+ * struct sof_ipc4_gain_data - IPC gain init blob
  * @base_config: IPC base config data
+ * @params: Initial parameters for the gain module
+ */
+struct sof_ipc4_gain_data {
+	struct sof_ipc4_base_module_cfg base_config;
+	struct sof_ipc4_gain_params params;
+} __packed __aligned(4);
+
+/**
+ * struct sof_ipc4_gain - gain config data
  * @data: IPC gain blob
  * @available_fmt: Available audio format
  * @msg: message structure for gain
  */
 struct sof_ipc4_gain {
-	struct sof_ipc4_base_module_cfg base_config;
 	struct sof_ipc4_gain_data data;
 	struct sof_ipc4_available_audio_format available_fmt;
 	struct sof_ipc4_msg msg;
@@ -404,16 +412,24 @@ struct sof_ipc4_mixer {
 	struct sof_ipc4_msg msg;
 };
 
-/**
- * struct sof_ipc4_src SRC config data
+/*
+ * struct sof_ipc4_src_data - IPC data for SRC
  * @base_config: IPC base config data
  * @sink_rate: Output rate for sink module
+ */
+struct sof_ipc4_src_data {
+	struct sof_ipc4_base_module_cfg base_config;
+	uint32_t sink_rate;
+} __packed __aligned(4);
+
+/**
+ * struct sof_ipc4_src - SRC config data
+ * @data: IPC base config data
  * @available_fmt: Available audio format
  * @msg: IPC4 message struct containing header and data info
  */
 struct sof_ipc4_src {
-	struct sof_ipc4_base_module_cfg base_config;
-	uint32_t sink_rate;
+	struct sof_ipc4_src_data data;
 	struct sof_ipc4_available_audio_format available_fmt;
 	struct sof_ipc4_msg msg;
 };
diff --git a/sound/soc/sof/mediatek/mt8186/mt8186.c b/sound/soc/sof/mediatek/mt8186/mt8186.c
index b69fa788b16f..e0d88e7aa8ca 100644
--- a/sound/soc/sof/mediatek/mt8186/mt8186.c
+++ b/sound/soc/sof/mediatek/mt8186/mt8186.c
@@ -597,6 +597,9 @@ static struct snd_sof_dsp_ops sof_mt8186_ops = {
 
 static struct snd_sof_of_mach sof_mt8186_machs[] = {
 	{
+		.compatible = "google,steelix",
+		.sof_tplg_filename = "sof-mt8186-google-steelix.tplg"
+	}, {
 		.compatible = "mediatek,mt8186",
 		.sof_tplg_filename = "sof-mt8186.tplg",
 	},
diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c
index 563fe6f7789f..77cc64ac7113 100644
--- a/sound/soc/sof/sof-audio.c
+++ b/sound/soc/sof/sof-audio.c
@@ -46,6 +46,7 @@ static int sof_widget_free_unlocked(struct snd_sof_dev *sdev,
 				    struct snd_sof_widget *swidget)
 {
 	const struct sof_ipc_tplg_ops *tplg_ops = sof_ipc_get_ops(sdev, tplg);
+	struct snd_sof_pipeline *spipe = swidget->spipe;
 	struct snd_sof_widget *pipe_widget;
 	int err = 0;
 	int ret;
@@ -87,15 +88,22 @@ static int sof_widget_free_unlocked(struct snd_sof_dev *sdev,
 	}
 
 	/*
-	 * disable widget core. continue to route setup status and complete flag
-	 * even if this fails and return the appropriate error
+	 * decrement ref count for cores associated with all modules in the pipeline and clear
+	 * the complete flag
 	 */
-	ret = snd_sof_dsp_core_put(sdev, swidget->core);
-	if (ret < 0) {
-		dev_err(sdev->dev, "error: failed to disable target core: %d for widget %s\n",
-			swidget->core, swidget->widget->name);
-		if (!err)
-			err = ret;
+	if (swidget->id == snd_soc_dapm_scheduler) {
+		int i;
+
+		for_each_set_bit(i, &spipe->core_mask, sdev->num_cores) {
+			ret = snd_sof_dsp_core_put(sdev, i);
+			if (ret < 0) {
+				dev_err(sdev->dev, "failed to disable target core: %d for pipeline %s\n",
+					i, swidget->widget->name);
+				if (!err)
+					err = ret;
+			}
+		}
+		swidget->spipe->complete = 0;
 	}
 
 	/*
@@ -108,10 +116,6 @@ static int sof_widget_free_unlocked(struct snd_sof_dev *sdev,
 			err = ret;
 	}
 
-	/* clear pipeline complete */
-	if (swidget->id == snd_soc_dapm_scheduler)
-		swidget->spipe->complete = 0;
-
 	if (!err)
 		dev_dbg(sdev->dev, "widget %s freed\n", swidget->widget->name);
 
@@ -134,8 +138,10 @@ static int sof_widget_setup_unlocked(struct snd_sof_dev *sdev,
 				     struct snd_sof_widget *swidget)
 {
 	const struct sof_ipc_tplg_ops *tplg_ops = sof_ipc_get_ops(sdev, tplg);
+	struct snd_sof_pipeline *spipe = swidget->spipe;
 	bool use_count_decremented = false;
 	int ret;
+	int i;
 
 	/* skip if there is no private data */
 	if (!swidget->private)
@@ -166,19 +172,23 @@ static int sof_widget_setup_unlocked(struct snd_sof_dev *sdev,
 			goto use_count_dec;
 	}
 
-	/* enable widget core */
-	ret = snd_sof_dsp_core_get(sdev, swidget->core);
-	if (ret < 0) {
-		dev_err(sdev->dev, "error: failed to enable target core for widget %s\n",
-			swidget->widget->name);
-		goto pipe_widget_free;
+	/* update ref count for cores associated with all modules in the pipeline */
+	if (swidget->id == snd_soc_dapm_scheduler) {
+		for_each_set_bit(i, &spipe->core_mask, sdev->num_cores) {
+			ret = snd_sof_dsp_core_get(sdev, i);
+			if (ret < 0) {
+				dev_err(sdev->dev, "failed to enable target core %d for pipeline %s\n",
+					i, swidget->widget->name);
+				goto pipe_widget_free;
+			}
+		}
 	}
 
 	/* setup widget in the DSP */
 	if (tplg_ops && tplg_ops->widget_setup) {
 		ret = tplg_ops->widget_setup(sdev, swidget);
 		if (ret < 0)
-			goto core_put;
+			goto pipe_widget_free;
 	}
 
 	/* send config for DAI components */
@@ -208,15 +218,22 @@ static int sof_widget_setup_unlocked(struct snd_sof_dev *sdev,
 	return 0;
 
 widget_free:
-	/* widget use_count and core ref_count will both be decremented by sof_widget_free() */
+	/* widget use_count will be decremented by sof_widget_free() */
 	sof_widget_free_unlocked(sdev, swidget);
 	use_count_decremented = true;
-core_put:
-	if (!use_count_decremented)
-		snd_sof_dsp_core_put(sdev, swidget->core);
 pipe_widget_free:
-	if (swidget->id != snd_soc_dapm_scheduler)
+	if (swidget->id != snd_soc_dapm_scheduler) {
 		sof_widget_free_unlocked(sdev, swidget->spipe->pipe_widget);
+	} else {
+		int j;
+
+		/* decrement ref count for all cores that were updated previously */
+		for_each_set_bit(j, &spipe->core_mask, sdev->num_cores) {
+			if (j >= i)
+				break;
+			snd_sof_dsp_core_put(sdev, j);
+		}
+	}
 use_count_dec:
 	if (!use_count_decremented)
 		swidget->use_count--;
diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h
index 5d5eeb1a1a6f..a6d6bcd00cee 100644
--- a/sound/soc/sof/sof-audio.h
+++ b/sound/soc/sof/sof-audio.h
@@ -480,6 +480,7 @@ struct snd_sof_widget {
  * @paused_count: Count of number of PCM's that have started and have currently paused this
 		  pipeline
  * @complete: flag used to indicate that pipeline set up is complete.
+ * @core_mask: Mask containing target cores for all modules in the pipeline
  * @list: List item in sdev pipeline_list
  */
 struct snd_sof_pipeline {
@@ -487,6 +488,7 @@ struct snd_sof_pipeline {
 	int started_count;
 	int paused_count;
 	int complete;
+	unsigned long core_mask;
 	struct list_head list;
 };
 
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index a3a3af252259..37ec671a2d76 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -1736,8 +1736,10 @@ static int sof_dai_load(struct snd_soc_component *scomp, int index,
 	/* perform pcm set op */
 	if (ipc_pcm_ops && ipc_pcm_ops->pcm_setup) {
 		ret = ipc_pcm_ops->pcm_setup(sdev, spcm);
-		if (ret < 0)
+		if (ret < 0) {
+			kfree(spcm);
 			return ret;
+		}
 	}
 
 	dai_drv->dobj.private = spcm;
diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index 898bc3baca7b..c8d48566e175 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -2978,6 +2978,7 @@ static int snd_bbfpro_controls_create(struct usb_mixer_interface *mixer)
 #define SND_DJM_850_IDX		0x2
 #define SND_DJM_900NXS2_IDX	0x3
 #define SND_DJM_750MK2_IDX	0x4
+#define SND_DJM_450_IDX		0x5
 
 
 #define SND_DJM_CTL(_name, suffix, _default_value, _windex) { \
@@ -3108,6 +3109,31 @@ static const struct snd_djm_ctl snd_djm_ctls_250mk2[] = {
 };
 
 
+// DJM-450
+static const u16 snd_djm_opts_450_cap1[] = {
+	0x0103, 0x0100, 0x0106, 0x0107, 0x0108, 0x0109, 0x010d, 0x010a };
+
+static const u16 snd_djm_opts_450_cap2[] = {
+	0x0203, 0x0200, 0x0206, 0x0207, 0x0208, 0x0209, 0x020d, 0x020a };
+
+static const u16 snd_djm_opts_450_cap3[] = {
+	0x030a, 0x0311, 0x0312, 0x0307, 0x0308, 0x0309, 0x030d };
+
+static const u16 snd_djm_opts_450_pb1[] = { 0x0100, 0x0101, 0x0104 };
+static const u16 snd_djm_opts_450_pb2[] = { 0x0200, 0x0201, 0x0204 };
+static const u16 snd_djm_opts_450_pb3[] = { 0x0300, 0x0301, 0x0304 };
+
+static const struct snd_djm_ctl snd_djm_ctls_450[] = {
+	SND_DJM_CTL("Capture Level", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
+	SND_DJM_CTL("Ch1 Input",   450_cap1, 2, SND_DJM_WINDEX_CAP),
+	SND_DJM_CTL("Ch2 Input",   450_cap2, 2, SND_DJM_WINDEX_CAP),
+	SND_DJM_CTL("Ch3 Input",   450_cap3, 0, SND_DJM_WINDEX_CAP),
+	SND_DJM_CTL("Ch1 Output",   450_pb1, 0, SND_DJM_WINDEX_PB),
+	SND_DJM_CTL("Ch2 Output",   450_pb2, 1, SND_DJM_WINDEX_PB),
+	SND_DJM_CTL("Ch3 Output",   450_pb3, 2, SND_DJM_WINDEX_PB)
+};
+
+
 // DJM-750
 static const u16 snd_djm_opts_750_cap1[] = {
 	0x0101, 0x0103, 0x0106, 0x0107, 0x0108, 0x0109, 0x010a, 0x010f };
@@ -3203,6 +3229,7 @@ static const struct snd_djm_device snd_djm_devices[] = {
 	[SND_DJM_850_IDX] = SND_DJM_DEVICE(850),
 	[SND_DJM_900NXS2_IDX] = SND_DJM_DEVICE(900nxs2),
 	[SND_DJM_750MK2_IDX] = SND_DJM_DEVICE(750mk2),
+	[SND_DJM_450_IDX] = SND_DJM_DEVICE(450),
 };
 
 
@@ -3454,6 +3481,9 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer)
 	case USB_ID(0x2b73, 0x0017): /* Pioneer DJ DJM-250MK2 */
 		err = snd_djm_controls_create(mixer, SND_DJM_250MK2_IDX);
 		break;
+	case USB_ID(0x2b73, 0x0013): /* Pioneer DJ DJM-450 */
+		err = snd_djm_controls_create(mixer, SND_DJM_450_IDX);
+		break;
 	case USB_ID(0x08e4, 0x017f): /* Pioneer DJ DJM-750 */
 		err = snd_djm_controls_create(mixer, SND_DJM_750_IDX);
 		break;
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index 5f6f84837a49..7c7493cb571f 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -79,13 +79,15 @@
 #define ARM_CPU_PART_CORTEX_A78AE	0xD42
 #define ARM_CPU_PART_CORTEX_X1		0xD44
 #define ARM_CPU_PART_CORTEX_A510	0xD46
+#define ARM_CPU_PART_CORTEX_A520	0xD80
 #define ARM_CPU_PART_CORTEX_A710	0xD47
 #define ARM_CPU_PART_CORTEX_A715	0xD4D
 #define ARM_CPU_PART_CORTEX_X2		0xD48
 #define ARM_CPU_PART_NEOVERSE_N2	0xD49
 #define ARM_CPU_PART_CORTEX_A78C	0xD4B
 
-#define APM_CPU_PART_POTENZA		0x000
+#define APM_CPU_PART_XGENE		0x000
+#define APM_CPU_VAR_POTENZA		0x00
 
 #define CAVIUM_CPU_PART_THUNDERX	0x0A1
 #define CAVIUM_CPU_PART_THUNDERX_81XX	0x0A2
@@ -148,6 +150,7 @@
 #define MIDR_CORTEX_A78AE	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
 #define MIDR_CORTEX_X1	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
 #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
 #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
 #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
 #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index f7ddd73a8c0f..89d2fc872d9f 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -505,6 +505,38 @@ struct kvm_smccc_filter {
 #define KVM_HYPERCALL_EXIT_SMC		(1U << 0)
 #define KVM_HYPERCALL_EXIT_16BIT	(1U << 1)
 
+/*
+ * Get feature ID registers userspace writable mask.
+ *
+ * From DDI0487J.a, D19.2.66 ("ID_AA64MMFR2_EL1, AArch64 Memory Model
+ * Feature Register 2"):
+ *
+ * "The Feature ID space is defined as the System register space in
+ * AArch64 with op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7},
+ * op2=={0-7}."
+ *
+ * This covers all currently known R/O registers that indicate
+ * anything useful feature wise, including the ID registers.
+ *
+ * If we ever need to introduce a new range, it will be described as
+ * such in the range field.
+ */
+#define KVM_ARM_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2)		\
+	({								\
+		__u64 __op1 = (op1) & 3;				\
+		__op1 -= (__op1 == 3);					\
+		(__op1 << 6 | ((crm) & 7) << 3 | (op2));		\
+	})
+
+#define KVM_ARM_FEATURE_ID_RANGE	0
+#define KVM_ARM_FEATURE_ID_RANGE_SIZE	(3 * 8 * 8)
+
+struct reg_mask_range {
+	__u64 addr;		/* Pointer to mask array */
+	__u32 range;		/* Requested range */
+	__u32 reserved[13];
+};
+
 #endif
 
 #endif /* __ARM_KVM_H__ */
diff --git a/tools/arch/arm64/include/uapi/asm/perf_regs.h b/tools/arch/arm64/include/uapi/asm/perf_regs.h
index fd157f46727e..86e556429e0e 100644
--- a/tools/arch/arm64/include/uapi/asm/perf_regs.h
+++ b/tools/arch/arm64/include/uapi/asm/perf_regs.h
@@ -36,11 +36,13 @@ enum perf_event_arm_regs {
 	PERF_REG_ARM64_LR,
 	PERF_REG_ARM64_SP,
 	PERF_REG_ARM64_PC,
+	PERF_REG_ARM64_MAX,
 
 	/* Extended/pseudo registers */
-	PERF_REG_ARM64_VG = 46, // SVE Vector Granule
-
-	PERF_REG_ARM64_MAX = PERF_REG_ARM64_PC + 1,
-	PERF_REG_ARM64_EXTENDED_MAX = PERF_REG_ARM64_VG + 1
+	PERF_REG_ARM64_VG = 46,				/* SVE Vector Granule */
+	PERF_REG_ARM64_EXTENDED_MAX
 };
+
+#define PERF_REG_EXTENDED_MASK	(1ULL << PERF_REG_ARM64_VG)
+
 #endif /* _ASM_ARM64_PERF_REGS_H */
diff --git a/tools/arch/arm64/tools/Makefile b/tools/arch/arm64/tools/Makefile
index 7f64b8bb5107..7b42feedf647 100644
--- a/tools/arch/arm64/tools/Makefile
+++ b/tools/arch/arm64/tools/Makefile
@@ -22,7 +22,7 @@ endif
 arm64_tools_dir = $(top_srcdir)/arch/arm64/tools
 arm64_sysreg_tbl = $(arm64_tools_dir)/sysreg
 arm64_gen_sysreg = $(arm64_tools_dir)/gen-sysreg.awk
-arm64_generated_dir = $(top_srcdir)/tools/arch/arm64/include/generated
+arm64_generated_dir = $(OUTPUT)arch/arm64/include/generated
 arm64_sysreg_defs = $(arm64_generated_dir)/asm/sysreg-defs.h
 
 all: $(arm64_sysreg_defs)
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index a73cf01a1606..abe926d43cbe 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc {
 	__u8 reserved[1728];
 };
 
+#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST	6
+#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST	7
+
+#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS	64
+struct kvm_s390_vm_cpu_uv_feat {
+	union {
+		struct {
+			__u64 : 4;
+			__u64 ap : 1;		/* bit 4 */
+			__u64 ap_intr : 1;	/* bit 5 */
+			__u64 : 58;
+		};
+		__u64 feat;
+	};
+};
+
 /* kvm attributes for crypto */
 #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW	0
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 798e60b5454b..4af140cf5719 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -198,7 +198,6 @@
 #define X86_FEATURE_CAT_L3		( 7*32+ 4) /* Cache Allocation Technology L3 */
 #define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
 #define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
-#define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 #define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_XCOMPACTED		( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
@@ -308,6 +307,11 @@
 #define X86_FEATURE_MSR_TSX_CTRL	(11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
 #define X86_FEATURE_SMBA		(11*32+21) /* "" Slow Memory Bandwidth Allocation */
 #define X86_FEATURE_BMEC		(11*32+22) /* "" Bandwidth Monitoring Event Configuration */
+#define X86_FEATURE_USER_SHSTK		(11*32+23) /* Shadow stack support for user mode applications */
+
+#define X86_FEATURE_SRSO		(11*32+24) /* "" AMD BTB untrain RETs */
+#define X86_FEATURE_SRSO_ALIAS		(11*32+25) /* "" AMD BTB untrain RETs through aliasing */
+#define X86_FEATURE_IBPB_ON_VMEXIT	(11*32+26) /* "" Issue an IBPB only on VMEXIT */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
@@ -380,6 +384,7 @@
 #define X86_FEATURE_OSPKE		(16*32+ 4) /* OS Protection Keys Enable */
 #define X86_FEATURE_WAITPKG		(16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */
 #define X86_FEATURE_AVX512_VBMI2	(16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_SHSTK		(16*32+ 7) /* "" Shadow stack */
 #define X86_FEATURE_GFNI		(16*32+ 8) /* Galois Field New Instructions */
 #define X86_FEATURE_VAES		(16*32+ 9) /* Vector AES */
 #define X86_FEATURE_VPCLMULQDQ		(16*32+10) /* Carry-Less Multiplication Double Quadword */
@@ -438,11 +443,16 @@
 
 /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
 #define X86_FEATURE_NO_NESTED_DATA_BP	(20*32+ 0) /* "" No Nested Data Breakpoints */
+#define X86_FEATURE_WRMSR_XX_BASE_NS	(20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
 #define X86_FEATURE_LFENCE_RDTSC	(20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */
 #define X86_FEATURE_NULL_SEL_CLR_BASE	(20*32+ 6) /* "" Null Selector Clears Base */
 #define X86_FEATURE_AUTOIBRS		(20*32+ 8) /* "" Automatic IBRS */
 #define X86_FEATURE_NO_SMM_CTL_MSR	(20*32+ 9) /* "" SMM_CTL MSR is not present */
 
+#define X86_FEATURE_SBPB		(20*32+27) /* "" Selective Branch Prediction Barrier */
+#define X86_FEATURE_IBPB_BRTYPE		(20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+#define X86_FEATURE_SRSO_NO		(20*32+29) /* "" CPU is not affected by SRSO */
+
 /*
  * BUG word(s)
  */
@@ -484,5 +494,9 @@
 #define X86_BUG_RETBLEED		X86_BUG(27) /* CPU is affected by RETBleed */
 #define X86_BUG_EIBRS_PBRSB		X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
 #define X86_BUG_SMT_RSB			X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
+#define X86_BUG_GDS			X86_BUG(30) /* CPU is affected by Gather Data Sampling */
 
+/* BUG word 2 */
+#define X86_BUG_SRSO			X86_BUG(1*32 + 0) /* AMD SRSO bug */
+#define X86_BUG_DIV0			X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index fafe9be7a6f4..702d93fdd10e 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -105,6 +105,18 @@
 # define DISABLE_TDX_GUEST	(1 << (X86_FEATURE_TDX_GUEST & 31))
 #endif
 
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+#define DISABLE_USER_SHSTK	0
+#else
+#define DISABLE_USER_SHSTK	(1 << (X86_FEATURE_USER_SHSTK & 31))
+#endif
+
+#ifdef CONFIG_X86_KERNEL_IBT
+#define DISABLE_IBT	0
+#else
+#define DISABLE_IBT	(1 << (X86_FEATURE_IBT & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -120,7 +132,7 @@
 #define DISABLED_MASK9	(DISABLE_SGX)
 #define DISABLED_MASK10	0
 #define DISABLED_MASK11	(DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
-			 DISABLE_CALL_DEPTH_TRACKING)
+			 DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
 #define DISABLED_MASK12	(DISABLE_LAM)
 #define DISABLED_MASK13	0
 #define DISABLED_MASK14	0
@@ -128,7 +140,7 @@
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
 			 DISABLE_ENQCMD)
 #define DISABLED_MASK17	0
-#define DISABLED_MASK18	0
+#define DISABLED_MASK18	(DISABLE_IBT)
 #define DISABLED_MASK19	0
 #define DISABLED_MASK20	0
 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 1d111350197f..1d51e1850ed0 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -222,6 +222,7 @@
 #define MSR_INTEGRITY_CAPS_ARRAY_BIST          BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT)
 #define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT	4
 #define MSR_INTEGRITY_CAPS_PERIODIC_BIST	BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)
+#define MSR_INTEGRITY_CAPS_SAF_GEN_MASK	GENMASK_ULL(10, 9)
 
 #define MSR_LBR_NHM_FROM		0x00000680
 #define MSR_LBR_NHM_TO			0x000006c0
@@ -553,6 +554,7 @@
 #define MSR_AMD64_CPUID_FN_1		0xc0011004
 #define MSR_AMD64_LS_CFG		0xc0011020
 #define MSR_AMD64_DC_CFG		0xc0011022
+#define MSR_AMD64_TW_CFG		0xc0011023
 
 #define MSR_AMD64_DE_CFG		0xc0011029
 #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT	 1
@@ -637,12 +639,21 @@
 /* AMD Last Branch Record MSRs */
 #define MSR_AMD64_LBR_SELECT			0xc000010e
 
+/* Zen4 */
+#define MSR_ZEN4_BP_CFG                 0xc001102e
+#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
+
+/* Fam 19h MSRs */
+#define MSR_F19H_UMC_PERF_CTL           0xc0010800
+#define MSR_F19H_UMC_PERF_CTR           0xc0010801
+
+/* Zen 2 */
+#define MSR_ZEN2_SPECTRAL_CHICKEN       0xc00110e3
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT   BIT_ULL(1)
+
 /* Fam 17h MSRs */
 #define MSR_F17H_IRPERF			0xc00000e9
 
-#define MSR_ZEN2_SPECTRAL_CHICKEN	0xc00110e3
-#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT	BIT_ULL(1)
-
 /* Fam 16h MSRs */
 #define MSR_F16H_L2I_PERF_CTL		0xc0010230
 #define MSR_F16H_L2I_PERF_CTR		0xc0010231
@@ -1112,12 +1123,16 @@
 #define MSR_IA32_VMX_MISC_INTEL_PT                 (1ULL << 14)
 #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
 #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE   0x1F
-/* AMD-V MSRs */
 
+/* AMD-V MSRs */
 #define MSR_VM_CR                       0xc0010114
 #define MSR_VM_IGNNE                    0xc0010115
 #define MSR_VM_HSAVE_PA                 0xc0010117
 
+#define SVM_VM_CR_VALID_MASK		0x001fULL
+#define SVM_VM_CR_SVM_LOCK_MASK		0x0008ULL
+#define SVM_VM_CR_SVM_DIS_MASK		0x0010ULL
+
 /* Hardware Feedback Interface */
 #define MSR_IA32_HW_FEEDBACK_PTR        0x17d0
 #define MSR_IA32_HW_FEEDBACK_CONFIG     0x17d1
diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h
index e8d7ebbca1a4..384e2cc6ac19 100644
--- a/tools/arch/x86/include/uapi/asm/prctl.h
+++ b/tools/arch/x86/include/uapi/asm/prctl.h
@@ -23,9 +23,21 @@
 #define ARCH_MAP_VDSO_32		0x2002
 #define ARCH_MAP_VDSO_64		0x2003
 
+/* Don't use 0x3001-0x3004 because of old glibcs */
+
 #define ARCH_GET_UNTAG_MASK		0x4001
 #define ARCH_ENABLE_TAGGED_ADDR		0x4002
 #define ARCH_GET_MAX_TAG_BITS		0x4003
 #define ARCH_FORCE_TAGGED_SVA		0x4004
 
+#define ARCH_SHSTK_ENABLE		0x5001
+#define ARCH_SHSTK_DISABLE		0x5002
+#define ARCH_SHSTK_LOCK			0x5003
+#define ARCH_SHSTK_UNLOCK		0x5004
+#define ARCH_SHSTK_STATUS		0x5005
+
+/* ARCH_SHSTK_ features bits */
+#define ARCH_SHSTK_SHSTK		(1ULL <<  0)
+#define ARCH_SHSTK_WRSS			(1ULL <<  1)
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/asm-generic/unaligned.h
index 156743d399ae..2fd551915c20 100644
--- a/tools/include/asm-generic/unaligned.h
+++ b/tools/include/asm-generic/unaligned.h
@@ -8,6 +8,7 @@
  */
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wpacked"
+#pragma GCC diagnostic ignored "-Wattributes"
 
 #define __get_unaligned_t(type, ptr) ({						\
 	const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);	\
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 76d946445391..756b013fb832 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -816,15 +816,21 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
 __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
 #define __NR_set_mempolicy_home_node 450
 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
-
 #define __NR_cachestat 451
 __SYSCALL(__NR_cachestat, sys_cachestat)
-
 #define __NR_fchmodat2 452
 __SYSCALL(__NR_fchmodat2, sys_fchmodat2)
+#define __NR_map_shadow_stack 453
+__SYSCALL(__NR_map_shadow_stack, sys_map_shadow_stack)
+#define __NR_futex_wake 454
+__SYSCALL(__NR_futex_wake, sys_futex_wake)
+#define __NR_futex_wait 455
+__SYSCALL(__NR_futex_wait, sys_futex_wait)
+#define __NR_futex_requeue 456
+__SYSCALL(__NR_futex_requeue, sys_futex_requeue)
 
 #undef __NR_syscalls
-#define __NR_syscalls 453
+#define __NR_syscalls 457
 
 /*
  * 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 794c1d857677..de723566c5ae 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -1134,6 +1134,26 @@ extern "C" {
 #define DRM_IOCTL_MODE_PAGE_FLIP	DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip)
 #define DRM_IOCTL_MODE_DIRTYFB		DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd)
 
+/**
+ * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object.
+ *
+ * KMS dumb buffers provide a very primitive way to allocate a buffer object
+ * suitable for scanout and map it for software rendering. KMS dumb buffers are
+ * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb
+ * buffers are not suitable to be displayed on any other device than the KMS
+ * device where they were allocated from. Also see
+ * :ref:`kms_dumb_buffer_objects`.
+ *
+ * The IOCTL argument is a struct drm_mode_create_dumb.
+ *
+ * User-space is expected to create a KMS dumb buffer via this IOCTL, then add
+ * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via
+ * &DRM_IOCTL_MODE_MAP_DUMB.
+ *
+ * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported.
+ * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate
+ * driver preferences for dumb buffers.
+ */
 #define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb)
 #define DRM_IOCTL_MODE_MAP_DUMB    DRM_IOWR(0xB3, struct drm_mode_map_dumb)
 #define DRM_IOCTL_MODE_DESTROY_DUMB    DRM_IOWR(0xB4, struct drm_mode_destroy_dumb)
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 7000e5910a1d..218edb0a96f8 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -38,13 +38,13 @@ extern "C" {
  */
 
 /**
- * DOC: uevents generated by i915 on it's device node
+ * DOC: uevents generated by i915 on its device node
  *
  * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch
- *	event from the gpu l3 cache. Additional information supplied is ROW,
+ *	event from the GPU L3 cache. Additional information supplied is ROW,
  *	BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep
- *	track of these events and if a specific cache-line seems to have a
- *	persistent error remap it with the l3 remapping tool supplied in
+ *	track of these events, and if a specific cache-line seems to have a
+ *	persistent error, remap it with the L3 remapping tool supplied in
  *	intel-gpu-tools.  The value supplied with the event is always 1.
  *
  * I915_ERROR_UEVENT - Generated upon error detection, currently only via
diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h
index fd1fb0d5389d..7a8f4c290187 100644
--- a/tools/include/uapi/linux/fscrypt.h
+++ b/tools/include/uapi/linux/fscrypt.h
@@ -71,7 +71,8 @@ struct fscrypt_policy_v2 {
 	__u8 contents_encryption_mode;
 	__u8 filenames_encryption_mode;
 	__u8 flags;
-	__u8 __reserved[4];
+	__u8 log2_data_unit_size;
+	__u8 __reserved[3];
 	__u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
 };
 
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index f089ab290978..211b86de35ac 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -264,6 +264,7 @@ struct kvm_xen_exit {
 #define KVM_EXIT_RISCV_SBI        35
 #define KVM_EXIT_RISCV_CSR        36
 #define KVM_EXIT_NOTIFY           37
+#define KVM_EXIT_LOONGARCH_IOCSR  38
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -336,6 +337,13 @@ struct kvm_run {
 			__u32 len;
 			__u8  is_write;
 		} mmio;
+		/* KVM_EXIT_LOONGARCH_IOCSR */
+		struct {
+			__u64 phys_addr;
+			__u8  data[8];
+			__u32 len;
+			__u8  is_write;
+		} iocsr_io;
 		/* KVM_EXIT_HYPERCALL */
 		struct {
 			__u64 nr;
@@ -1192,6 +1200,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_COUNTER_OFFSET 227
 #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
+#define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1362,6 +1371,7 @@ struct kvm_dirty_tlb {
 #define KVM_REG_ARM64		0x6000000000000000ULL
 #define KVM_REG_MIPS		0x7000000000000000ULL
 #define KVM_REG_RISCV		0x8000000000000000ULL
+#define KVM_REG_LOONGARCH	0x9000000000000000ULL
 
 #define KVM_REG_SIZE_SHIFT	52
 #define KVM_REG_SIZE_MASK	0x00f0000000000000ULL
@@ -1418,9 +1428,16 @@ struct kvm_device_attr {
 	__u64	addr;		/* userspace address of attr data */
 };
 
-#define  KVM_DEV_VFIO_GROUP			1
-#define   KVM_DEV_VFIO_GROUP_ADD			1
-#define   KVM_DEV_VFIO_GROUP_DEL			2
+#define  KVM_DEV_VFIO_FILE			1
+
+#define   KVM_DEV_VFIO_FILE_ADD			1
+#define   KVM_DEV_VFIO_FILE_DEL			2
+
+/* KVM_DEV_VFIO_GROUP aliases are for compile time uapi compatibility */
+#define  KVM_DEV_VFIO_GROUP	KVM_DEV_VFIO_FILE
+
+#define   KVM_DEV_VFIO_GROUP_ADD	KVM_DEV_VFIO_FILE_ADD
+#define   KVM_DEV_VFIO_GROUP_DEL	KVM_DEV_VFIO_FILE_DEL
 #define   KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE		3
 
 enum kvm_device_type {
@@ -1555,6 +1572,7 @@ struct kvm_s390_ucas_mapping {
 #define KVM_ARM_MTE_COPY_TAGS	  _IOR(KVMIO,  0xb4, struct kvm_arm_copy_mte_tags)
 /* Available with KVM_CAP_COUNTER_OFFSET */
 #define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO,  0xb5, struct kvm_arm_counter_offset)
+#define KVM_ARM_GET_REG_WRITABLE_MASKS _IOR(KVMIO,  0xb6, struct reg_mask_range)
 
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index 8eb0d7b758d2..bb242fdcfe6b 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -100,8 +100,9 @@ enum fsconfig_command {
 	FSCONFIG_SET_PATH	= 3,	/* Set parameter, supplying an object by path */
 	FSCONFIG_SET_PATH_EMPTY	= 4,	/* Set parameter, supplying an object by (empty) path */
 	FSCONFIG_SET_FD		= 5,	/* Set parameter, supplying an object by fd */
-	FSCONFIG_CMD_CREATE	= 6,	/* Invoke superblock creation */
+	FSCONFIG_CMD_CREATE	= 6,	/* Create new or reuse existing superblock */
 	FSCONFIG_CMD_RECONFIGURE = 7,	/* Invoke superblock reconfiguration */
+	FSCONFIG_CMD_CREATE_EXCL = 8,	/* Create new superblock, fail if reusing existing superblock */
 };
 
 /*
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index f5c48b61ab62..649560c685f1 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -219,4 +219,12 @@
  */
 #define VHOST_VDPA_RESUME		_IO(VHOST_VIRTIO, 0x7E)
 
+/* Get the group for the descriptor table including driver & device areas
+ * of a virtqueue: read index, write group in num.
+ * The virtqueue index is stored in the index field of vhost_vring_state.
+ * The group ID of the descriptor table for this specific virtqueue
+ * is returned via num field of vhost_vring_state.
+ */
+#define VHOST_VDPA_GET_VRING_DESC_GROUP	_IOWR(VHOST_VIRTIO, 0x7F,	\
+					      struct vhost_vring_state)
 #endif
diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c
index c12ca87ca2bb..8e757e249dab 100644
--- a/tools/net/ynl/generated/devlink-user.c
+++ b/tools/net/ynl/generated/devlink-user.c
@@ -2399,6 +2399,7 @@ void devlink_port_set_req_free(struct devlink_port_set_req *req)
 
 int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -2416,7 +2417,7 @@ int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req)
 	if (req->_present.port_function)
 		devlink_dl_port_function_put(nlh, DEVLINK_ATTR_PORT_FUNCTION, &req->port_function);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -2537,6 +2538,7 @@ void devlink_port_del_req_free(struct devlink_port_del_req *req)
 
 int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -2550,7 +2552,7 @@ int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req)
 	if (req->_present.port_index)
 		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -2568,6 +2570,7 @@ void devlink_port_split_req_free(struct devlink_port_split_req *req)
 
 int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -2583,7 +2586,7 @@ int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req)
 	if (req->_present.port_split_count)
 		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_SPLIT_COUNT, req->port_split_count);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -2602,6 +2605,7 @@ void devlink_port_unsplit_req_free(struct devlink_port_unsplit_req *req)
 int devlink_port_unsplit(struct ynl_sock *ys,
 			 struct devlink_port_unsplit_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -2615,7 +2619,7 @@ int devlink_port_unsplit(struct ynl_sock *ys,
 	if (req->_present.port_index)
 		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -2926,6 +2930,7 @@ void devlink_sb_pool_set_req_free(struct devlink_sb_pool_set_req *req)
 int devlink_sb_pool_set(struct ynl_sock *ys,
 			struct devlink_sb_pool_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -2945,7 +2950,7 @@ int devlink_sb_pool_set(struct ynl_sock *ys,
 	if (req->_present.sb_pool_size)
 		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_POOL_SIZE, req->sb_pool_size);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -3126,6 +3131,7 @@ devlink_sb_port_pool_set_req_free(struct devlink_sb_port_pool_set_req *req)
 int devlink_sb_port_pool_set(struct ynl_sock *ys,
 			     struct devlink_sb_port_pool_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -3145,7 +3151,7 @@ int devlink_sb_port_pool_set(struct ynl_sock *ys,
 	if (req->_present.sb_threshold)
 		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -3334,6 +3340,7 @@ devlink_sb_tc_pool_bind_set_req_free(struct devlink_sb_tc_pool_bind_set_req *req
 int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys,
 				struct devlink_sb_tc_pool_bind_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -3357,7 +3364,7 @@ int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys,
 	if (req->_present.sb_threshold)
 		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -3376,6 +3383,7 @@ void devlink_sb_occ_snapshot_req_free(struct devlink_sb_occ_snapshot_req *req)
 int devlink_sb_occ_snapshot(struct ynl_sock *ys,
 			    struct devlink_sb_occ_snapshot_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -3389,7 +3397,7 @@ int devlink_sb_occ_snapshot(struct ynl_sock *ys,
 	if (req->_present.sb_index)
 		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -3409,6 +3417,7 @@ devlink_sb_occ_max_clear_req_free(struct devlink_sb_occ_max_clear_req *req)
 int devlink_sb_occ_max_clear(struct ynl_sock *ys,
 			     struct devlink_sb_occ_max_clear_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -3422,7 +3431,7 @@ int devlink_sb_occ_max_clear(struct ynl_sock *ys,
 	if (req->_present.sb_index)
 		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -3544,6 +3553,7 @@ void devlink_eswitch_set_req_free(struct devlink_eswitch_set_req *req)
 int devlink_eswitch_set(struct ynl_sock *ys,
 			struct devlink_eswitch_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -3561,7 +3571,7 @@ int devlink_eswitch_set(struct ynl_sock *ys,
 	if (req->_present.eswitch_encap_mode)
 		mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, req->eswitch_encap_mode);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -3895,6 +3905,7 @@ devlink_dpipe_table_counters_set_req_free(struct devlink_dpipe_table_counters_se
 int devlink_dpipe_table_counters_set(struct ynl_sock *ys,
 				     struct devlink_dpipe_table_counters_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -3910,7 +3921,7 @@ int devlink_dpipe_table_counters_set(struct ynl_sock *ys,
 	if (req->_present.dpipe_table_counters_enabled)
 		mnl_attr_put_u8(nlh, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, req->dpipe_table_counters_enabled);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -3929,6 +3940,7 @@ void devlink_resource_set_req_free(struct devlink_resource_set_req *req)
 int devlink_resource_set(struct ynl_sock *ys,
 			 struct devlink_resource_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -3944,7 +3956,7 @@ int devlink_resource_set(struct ynl_sock *ys,
 	if (req->_present.resource_size)
 		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_SIZE, req->resource_size);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -4319,6 +4331,7 @@ void devlink_param_set_req_free(struct devlink_param_set_req *req)
 
 int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -4336,7 +4349,7 @@ int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req)
 	if (req->_present.param_value_cmode)
 		mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, req->param_value_cmode);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -4631,6 +4644,7 @@ void devlink_region_del_req_free(struct devlink_region_del_req *req)
 
 int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -4648,7 +4662,7 @@ int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req)
 	if (req->_present.region_snapshot_id)
 		mnl_attr_put_u32(nlh, DEVLINK_ATTR_REGION_SNAPSHOT_ID, req->region_snapshot_id);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -4922,6 +4936,7 @@ void devlink_port_param_set_req_free(struct devlink_port_param_set_req *req)
 int devlink_port_param_set(struct ynl_sock *ys,
 			   struct devlink_port_param_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -4935,7 +4950,7 @@ int devlink_port_param_set(struct ynl_sock *ys,
 	if (req->_present.port_index)
 		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -5360,6 +5375,7 @@ devlink_health_reporter_set_req_free(struct devlink_health_reporter_set_req *req
 int devlink_health_reporter_set(struct ynl_sock *ys,
 				struct devlink_health_reporter_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -5381,7 +5397,7 @@ int devlink_health_reporter_set(struct ynl_sock *ys,
 	if (req->_present.health_reporter_auto_dump)
 		mnl_attr_put_u8(nlh, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, req->health_reporter_auto_dump);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -5402,6 +5418,7 @@ devlink_health_reporter_recover_req_free(struct devlink_health_reporter_recover_
 int devlink_health_reporter_recover(struct ynl_sock *ys,
 				    struct devlink_health_reporter_recover_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -5417,7 +5434,7 @@ int devlink_health_reporter_recover(struct ynl_sock *ys,
 	if (req->_present.health_reporter_name_len)
 		mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -5438,6 +5455,7 @@ devlink_health_reporter_diagnose_req_free(struct devlink_health_reporter_diagnos
 int devlink_health_reporter_diagnose(struct ynl_sock *ys,
 				     struct devlink_health_reporter_diagnose_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -5453,7 +5471,7 @@ int devlink_health_reporter_diagnose(struct ynl_sock *ys,
 	if (req->_present.health_reporter_name_len)
 		mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -5556,6 +5574,7 @@ devlink_health_reporter_dump_clear_req_free(struct devlink_health_reporter_dump_
 int devlink_health_reporter_dump_clear(struct ynl_sock *ys,
 				       struct devlink_health_reporter_dump_clear_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -5571,7 +5590,7 @@ int devlink_health_reporter_dump_clear(struct ynl_sock *ys,
 	if (req->_present.health_reporter_name_len)
 		mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -5592,6 +5611,7 @@ void devlink_flash_update_req_free(struct devlink_flash_update_req *req)
 int devlink_flash_update(struct ynl_sock *ys,
 			 struct devlink_flash_update_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -5609,7 +5629,7 @@ int devlink_flash_update(struct ynl_sock *ys,
 	if (req->_present.flash_update_overwrite_mask)
 		mnl_attr_put(nlh, DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, sizeof(struct nla_bitfield32), &req->flash_update_overwrite_mask);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -5780,6 +5800,7 @@ void devlink_trap_set_req_free(struct devlink_trap_set_req *req)
 
 int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -5795,7 +5816,7 @@ int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req)
 	if (req->_present.trap_action)
 		mnl_attr_put_u8(nlh, DEVLINK_ATTR_TRAP_ACTION, req->trap_action);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -5968,6 +5989,7 @@ void devlink_trap_group_set_req_free(struct devlink_trap_group_set_req *req)
 int devlink_trap_group_set(struct ynl_sock *ys,
 			   struct devlink_trap_group_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -5985,7 +6007,7 @@ int devlink_trap_group_set(struct ynl_sock *ys,
 	if (req->_present.trap_policer_id)
 		mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, req->trap_policer_id);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -6152,6 +6174,7 @@ devlink_trap_policer_set_req_free(struct devlink_trap_policer_set_req *req)
 int devlink_trap_policer_set(struct ynl_sock *ys,
 			     struct devlink_trap_policer_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -6169,7 +6192,7 @@ int devlink_trap_policer_set(struct ynl_sock *ys,
 	if (req->_present.trap_policer_burst)
 		mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_BURST, req->trap_policer_burst);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -6190,6 +6213,7 @@ devlink_health_reporter_test_req_free(struct devlink_health_reporter_test_req *r
 int devlink_health_reporter_test(struct ynl_sock *ys,
 				 struct devlink_health_reporter_test_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -6205,7 +6229,7 @@ int devlink_health_reporter_test(struct ynl_sock *ys,
 	if (req->_present.health_reporter_name_len)
 		mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -6384,6 +6408,7 @@ void devlink_rate_set_req_free(struct devlink_rate_set_req *req)
 
 int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -6407,7 +6432,7 @@ int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req)
 	if (req->_present.rate_parent_node_name_len)
 		mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -6427,6 +6452,7 @@ void devlink_rate_new_req_free(struct devlink_rate_new_req *req)
 
 int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -6450,7 +6476,7 @@ int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req)
 	if (req->_present.rate_parent_node_name_len)
 		mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -6469,6 +6495,7 @@ void devlink_rate_del_req_free(struct devlink_rate_del_req *req)
 
 int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -6482,7 +6509,7 @@ int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req)
 	if (req->_present.rate_node_name_len)
 		mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -6645,6 +6672,7 @@ void devlink_linecard_set_req_free(struct devlink_linecard_set_req *req)
 int devlink_linecard_set(struct ynl_sock *ys,
 			 struct devlink_linecard_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -6660,7 +6688,7 @@ int devlink_linecard_set(struct ynl_sock *ys,
 	if (req->_present.linecard_type_len)
 		mnl_attr_put_strz(nlh, DEVLINK_ATTR_LINECARD_TYPE, req->linecard_type);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -6810,6 +6838,7 @@ void devlink_selftests_run_req_free(struct devlink_selftests_run_req *req)
 int devlink_selftests_run(struct ynl_sock *ys,
 			  struct devlink_selftests_run_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -6823,7 +6852,7 @@ int devlink_selftests_run(struct ynl_sock *ys,
 	if (req->_present.selftests)
 		devlink_dl_selftest_id_put(nlh, DEVLINK_ATTR_SELFTESTS, &req->selftests);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
diff --git a/tools/net/ynl/generated/ethtool-user.c b/tools/net/ynl/generated/ethtool-user.c
index 74b883a14958..660435639e2b 100644
--- a/tools/net/ynl/generated/ethtool-user.c
+++ b/tools/net/ynl/generated/ethtool-user.c
@@ -1843,6 +1843,7 @@ void ethtool_linkinfo_set_req_free(struct ethtool_linkinfo_set_req *req)
 int ethtool_linkinfo_set(struct ynl_sock *ys,
 			 struct ethtool_linkinfo_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -1862,7 +1863,7 @@ int ethtool_linkinfo_set(struct ynl_sock *ys,
 	if (req->_present.transceiver)
 		mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_TRANSCEIVER, req->transceiver);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -2067,6 +2068,7 @@ void ethtool_linkmodes_set_req_free(struct ethtool_linkmodes_set_req *req)
 int ethtool_linkmodes_set(struct ynl_sock *ys,
 			  struct ethtool_linkmodes_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -2094,7 +2096,7 @@ int ethtool_linkmodes_set(struct ynl_sock *ys,
 	if (req->_present.rate_matching)
 		mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_RATE_MATCHING, req->rate_matching);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -2398,6 +2400,7 @@ void ethtool_debug_set_req_free(struct ethtool_debug_set_req *req)
 
 int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -2409,7 +2412,7 @@ int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req)
 	if (req->_present.msgmask)
 		ethtool_bitset_put(nlh, ETHTOOL_A_DEBUG_MSGMASK, &req->msgmask);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -2577,6 +2580,7 @@ void ethtool_wol_set_req_free(struct ethtool_wol_set_req *req)
 
 int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -2590,7 +2594,7 @@ int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req)
 	if (req->_present.sopass_len)
 		mnl_attr_put(nlh, ETHTOOL_A_WOL_SOPASS, req->_present.sopass_len, req->sopass);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -3045,6 +3049,7 @@ void ethtool_privflags_set_req_free(struct ethtool_privflags_set_req *req)
 int ethtool_privflags_set(struct ynl_sock *ys,
 			  struct ethtool_privflags_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -3056,7 +3061,7 @@ int ethtool_privflags_set(struct ynl_sock *ys,
 	if (req->_present.flags)
 		ethtool_bitset_put(nlh, ETHTOOL_A_PRIVFLAGS_FLAGS, &req->flags);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -3273,6 +3278,7 @@ void ethtool_rings_set_req_free(struct ethtool_rings_set_req *req)
 
 int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -3312,7 +3318,7 @@ int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req)
 	if (req->_present.tx_push_buf_len_max)
 		mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, req->tx_push_buf_len_max);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -3495,6 +3501,7 @@ void ethtool_channels_set_req_free(struct ethtool_channels_set_req *req)
 int ethtool_channels_set(struct ynl_sock *ys,
 			 struct ethtool_channels_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -3520,7 +3527,7 @@ int ethtool_channels_set(struct ynl_sock *ys,
 	if (req->_present.combined_count)
 		mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_COMBINED_COUNT, req->combined_count);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -3798,6 +3805,7 @@ void ethtool_coalesce_set_req_free(struct ethtool_coalesce_set_req *req)
 int ethtool_coalesce_set(struct ynl_sock *ys,
 			 struct ethtool_coalesce_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -3861,7 +3869,7 @@ int ethtool_coalesce_set(struct ynl_sock *ys,
 	if (req->_present.tx_aggr_time_usecs)
 		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, req->tx_aggr_time_usecs);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -4036,6 +4044,7 @@ void ethtool_pause_set_req_free(struct ethtool_pause_set_req *req)
 
 int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -4055,7 +4064,7 @@ int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req)
 	if (req->_present.stats_src)
 		mnl_attr_put_u32(nlh, ETHTOOL_A_PAUSE_STATS_SRC, req->stats_src);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -4242,6 +4251,7 @@ void ethtool_eee_set_req_free(struct ethtool_eee_set_req *req)
 
 int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -4263,7 +4273,7 @@ int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req)
 	if (req->_present.tx_lpi_timer)
 		mnl_attr_put_u32(nlh, ETHTOOL_A_EEE_TX_LPI_TIMER, req->tx_lpi_timer);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -4437,6 +4447,7 @@ void ethtool_cable_test_act_req_free(struct ethtool_cable_test_act_req *req)
 int ethtool_cable_test_act(struct ynl_sock *ys,
 			   struct ethtool_cable_test_act_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -4446,7 +4457,7 @@ int ethtool_cable_test_act(struct ynl_sock *ys,
 	if (req->_present.header)
 		ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_HEADER, &req->header);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -4465,6 +4476,7 @@ ethtool_cable_test_tdr_act_req_free(struct ethtool_cable_test_tdr_act_req *req)
 int ethtool_cable_test_tdr_act(struct ynl_sock *ys,
 			       struct ethtool_cable_test_tdr_act_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -4474,7 +4486,7 @@ int ethtool_cable_test_tdr_act(struct ynl_sock *ys,
 	if (req->_present.header)
 		ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_TDR_HEADER, &req->header);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -4782,6 +4794,7 @@ void ethtool_fec_set_req_free(struct ethtool_fec_set_req *req)
 
 int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -4799,7 +4812,7 @@ int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req)
 	if (req->_present.stats)
 		ethtool_fec_stat_put(nlh, ETHTOOL_A_FEC_STATS, &req->stats);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -5235,6 +5248,7 @@ void ethtool_module_set_req_free(struct ethtool_module_set_req *req)
 
 int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -5248,7 +5262,7 @@ int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req)
 	if (req->_present.power_mode)
 		mnl_attr_put_u8(nlh, ETHTOOL_A_MODULE_POWER_MODE, req->power_mode);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -5397,6 +5411,7 @@ void ethtool_pse_set_req_free(struct ethtool_pse_set_req *req)
 
 int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -5412,7 +5427,7 @@ int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req)
 	if (req->_present.pw_d_status)
 		mnl_attr_put_u32(nlh, ETHTOOL_A_PODL_PSE_PW_D_STATUS, req->pw_d_status);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -5746,6 +5761,7 @@ void ethtool_plca_set_cfg_req_free(struct ethtool_plca_set_cfg_req *req)
 int ethtool_plca_set_cfg(struct ynl_sock *ys,
 			 struct ethtool_plca_set_cfg_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -5771,7 +5787,7 @@ int ethtool_plca_set_cfg(struct ynl_sock *ys,
 	if (req->_present.burst_tmr)
 		mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_BURST_TMR, req->burst_tmr);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -6124,6 +6140,7 @@ void ethtool_mm_set_req_free(struct ethtool_mm_set_req *req)
 
 int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -6143,7 +6160,7 @@ int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req)
 	if (req->_present.tx_min_frag_size)
 		mnl_attr_put_u32(nlh, ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, req->tx_min_frag_size);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
diff --git a/tools/net/ynl/generated/fou-user.c b/tools/net/ynl/generated/fou-user.c
index 4271b5d43c58..f30bef23bc31 100644
--- a/tools/net/ynl/generated/fou-user.c
+++ b/tools/net/ynl/generated/fou-user.c
@@ -72,6 +72,7 @@ void fou_add_req_free(struct fou_add_req *req)
 
 int fou_add(struct ynl_sock *ys, struct fou_add_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -99,7 +100,7 @@ int fou_add(struct ynl_sock *ys, struct fou_add_req *req)
 	if (req->_present.ifindex)
 		mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
@@ -117,6 +118,7 @@ void fou_del_req_free(struct fou_del_req *req)
 
 int fou_del(struct ynl_sock *ys, struct fou_del_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -140,7 +142,7 @@ int fou_del(struct ynl_sock *ys, struct fou_del_req *req)
 	if (req->_present.peer_v6_len)
 		mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
diff --git a/tools/net/ynl/generated/handshake-user.c b/tools/net/ynl/generated/handshake-user.c
index 7c67765daf90..6901f8462cca 100644
--- a/tools/net/ynl/generated/handshake-user.c
+++ b/tools/net/ynl/generated/handshake-user.c
@@ -295,6 +295,7 @@ void handshake_done_req_free(struct handshake_done_req *req)
 
 int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req)
 {
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -308,7 +309,7 @@ int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req)
 	for (unsigned int i = 0; i < req->n_remote_auth; i++)
 		mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_REMOTE_AUTH, req->remote_auth[i]);
 
-	err = ynl_exec(ys, nlh, NULL);
+	err = ynl_exec(ys, nlh, &yrs);
 	if (err < 0)
 		return -1;
 
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 3bd6b928c14f..8337aa6de25e 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -1709,14 +1709,14 @@ def print_req(ri):
     ret_ok = '0'
     ret_err = '-1'
     direction = "request"
-    local_vars = ['struct nlmsghdr *nlh;',
+    local_vars = ['struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };',
+                  'struct nlmsghdr *nlh;',
                   'int err;']
 
     if 'reply' in ri.op[ri.op_mode]:
         ret_ok = 'rsp'
         ret_err = 'NULL'
-        local_vars += [f'{type_name(ri, rdir(direction))} *rsp;',
-                       'struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };']
+        local_vars += [f'{type_name(ri, rdir(direction))} *rsp;']
 
     print_prototype(ri, direction, terminate=False)
     ri.cw.block_start()
@@ -1732,7 +1732,6 @@ def print_req(ri):
         attr.attr_put(ri, "req")
     ri.cw.nl()
 
-    parse_arg = "NULL"
     if 'reply' in ri.op[ri.op_mode]:
         ri.cw.p('rsp = calloc(1, sizeof(*rsp));')
         ri.cw.p('yrs.yarg.data = rsp;')
@@ -1742,8 +1741,7 @@ def print_req(ri):
         else:
             ri.cw.p(f'yrs.rsp_cmd = {ri.op.rsp_value};')
         ri.cw.nl()
-        parse_arg = '&yrs'
-    ri.cw.p(f"err = ynl_exec(ys, nlh, {parse_arg});")
+    ri.cw.p("err = ynl_exec(ys, nlh, &yrs);")
     ri.cw.p('if (err < 0)')
     if 'reply' in ri.op[ri.op_mode]:
         ri.cw.p('goto err_free;')
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 1da7f4b91b4f..dc42de1785ce 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,3 +1,5 @@
+arch/arm64/tools/gen-sysreg.awk
+arch/arm64/tools/sysreg
 tools/perf
 tools/arch
 tools/scripts
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index d88da787e815..058c9aecf608 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -434,6 +434,21 @@ export INSTALL SHELL_PATH
 
 SHELL = $(SHELL_PATH)
 
+arm64_gen_sysreg_dir := $(srctree)/tools/arch/arm64/tools
+ifneq ($(OUTPUT),)
+  arm64_gen_sysreg_outdir := $(OUTPUT)
+else
+  arm64_gen_sysreg_outdir := $(CURDIR)
+endif
+
+arm64-sysreg-defs: FORCE
+	$(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir)
+
+arm64-sysreg-defs-clean:
+	$(call QUIET_CLEAN,arm64-sysreg-defs)
+	$(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) \
+		clean > /dev/null
+
 beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/
 linux_uapi_dir := $(srctree)/tools/include/uapi/linux
 asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
@@ -450,15 +465,6 @@ drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh
 # Create output directory if not already present
 _dummy := $(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_outdir)')
 
-arm64_gen_sysreg_dir := $(srctree)/tools/arch/arm64/tools
-
-arm64-sysreg-defs: FORCE
-	$(Q)$(MAKE) -C $(arm64_gen_sysreg_dir)
-
-arm64-sysreg-defs-clean:
-	$(call QUIET_CLEAN,arm64-sysreg-defs)
-	$(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) clean > /dev/null
-
 $(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl)
 	$(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@
 
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 80be0e98ea0c..116ff501bf92 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -367,3 +367,7 @@
 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	n64	cachestat			sys_cachestat
 452	n64	fchmodat2			sys_fchmodat2
+453	n64	map_shadow_stack		sys_map_shadow_stack
+454	n64	futex_wake			sys_futex_wake
+455	n64	futex_wait			sys_futex_wait
+456	n64	futex_requeue			sys_futex_requeue
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index e1412519b4ad..7fab411378f2 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -539,3 +539,7 @@
 450 	nospu	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	common	cachestat			sys_cachestat
 452	common	fchmodat2			sys_fchmodat2
+453	common	map_shadow_stack		sys_ni_syscall
+454	common	futex_wake			sys_futex_wake
+455	common	futex_wait			sys_futex_wait
+456	common	futex_requeue			sys_futex_requeue
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index cc0bc144b661..86fec9b080f6 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -455,3 +455,7 @@
 450  common	set_mempolicy_home_node	sys_set_mempolicy_home_node	sys_set_mempolicy_home_node
 451  common	cachestat		sys_cachestat			sys_cachestat
 452  common	fchmodat2		sys_fchmodat2			sys_fchmodat2
+453  common	map_shadow_stack	sys_map_shadow_stack		sys_map_shadow_stack
+454  common	futex_wake		sys_futex_wake			sys_futex_wake
+455  common	futex_wait		sys_futex_wait			sys_futex_wait
+456  common	futex_requeue		sys_futex_requeue		sys_futex_requeue
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 2a62eaf30d69..8cb8bf68721c 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -375,6 +375,9 @@
 451	common	cachestat		sys_cachestat
 452	common	fchmodat2		sys_fchmodat2
 453	64	map_shadow_stack	sys_map_shadow_stack
+454	common	futex_wake		sys_futex_wake
+455	common	futex_wait		sys_futex_wait
+456	common	futex_requeue		sys_futex_requeue
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
index f007a9b27065..0092b9b39611 100644
--- a/tools/perf/builtin-kwork.c
+++ b/tools/perf/builtin-kwork.c
@@ -1643,7 +1643,7 @@ static int top_print_work(struct perf_kwork *kwork __maybe_unused, struct kwork_
 	/*
 	 * pid
 	 */
-	ret += printf(" %*ld ", PRINT_PID_WIDTH, work->id);
+	ret += printf(" %*" PRIu64 " ", PRINT_PID_WIDTH, work->id);
 
 	/*
 	 * tgid
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index a343823c8ddf..61c2c96cc070 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -434,6 +434,11 @@ static void json_print_metric(void *ps __maybe_unused, const char *group,
 	strbuf_release(&buf);
 }
 
+static bool json_skip_duplicate_pmus(void *ps __maybe_unused)
+{
+	return false;
+}
+
 static bool default_skip_duplicate_pmus(void *ps)
 {
 	struct print_state *print_state = ps;
@@ -503,6 +508,7 @@ int cmd_list(int argc, const char **argv)
 			.print_end = json_print_end,
 			.print_event = json_print_event,
 			.print_metric = json_print_metric,
+			.skip_duplicate_pmus = json_skip_duplicate_pmus,
 		};
 		ps = &json_ps;
 	} else {
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json
index e2848a9d4848..afcdad58ef89 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json
@@ -231,6 +231,7 @@
         "MetricName": "slots_lost_misspeculation_fraction",
         "MetricExpr": "100 * ((OP_SPEC - OP_RETIRED) / (CPU_CYCLES * #slots))",
         "BriefDescription": "Fraction of slots lost due to misspeculation",
+        "DefaultMetricgroupName": "TopdownL1",
         "MetricGroup": "Default;TopdownL1",
         "ScaleUnit": "1percent of slots"
     },
@@ -238,6 +239,7 @@
         "MetricName": "retired_fraction",
         "MetricExpr": "100 * (OP_RETIRED / (CPU_CYCLES * #slots))",
         "BriefDescription": "Fraction of slots retiring, useful work",
+        "DefaultMetricgroupName": "TopdownL1",
         "MetricGroup": "Default;TopdownL1",
 	"ScaleUnit": "1percent of slots"
     },
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index 39b74d83c7c4..cfcb7e2c3813 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -383,6 +383,7 @@ struct ucred {
 #define SOL_MPTCP	284
 #define SOL_MCTP	285
 #define SOL_SMC		286
+#define SOL_VSOCK	287
 
 /* IPX options */
 #define IPX_TYPE	1
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index fb661c48992f..988473bf907a 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -347,7 +347,7 @@ CFLAGS_rbtree.o        += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET
 CFLAGS_libstring.o     += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 CFLAGS_hweight.o       += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 CFLAGS_header.o        += -include $(OUTPUT)PERF-VERSION-FILE
-CFLAGS_arm-spe.o       += -I$(srctree)/tools/arch/arm64/include/ -I$(srctree)/tools/arch/arm64/include/generated/
+CFLAGS_arm-spe.o       += -I$(srctree)/tools/arch/arm64/include/ -I$(OUTPUT)arch/arm64/include/generated/
 
 $(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE
 	$(call rule_mkdir)
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index e105245eb905..f1716c089c99 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -12,6 +12,7 @@
 #include <linux/zalloc.h>
 #include <linux/string.h>
 #include <bpf/bpf.h>
+#include <inttypes.h>
 
 #include "bpf_skel/lock_contention.skel.h"
 #include "bpf_skel/lock_data.h"
@@ -250,7 +251,7 @@ static const char *lock_contention_get_name(struct lock_contention *con,
 		if (cgrp)
 			return cgrp->name;
 
-		snprintf(name_buf, sizeof(name_buf), "cgroup:%lu", cgrp_id);
+		snprintf(name_buf, sizeof(name_buf), "cgroup:%" PRIu64 "", cgrp_id);
 		return name_buf;
 	}
 
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 0484736d9fe4..ca3e0404f187 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -225,7 +225,7 @@ static struct metric *metric__new(const struct pmu_metric *pm,
 
 	m->pmu = pm->pmu ?: "cpu";
 	m->metric_name = pm->metric_name;
-	m->default_metricgroup_name = pm->default_metricgroup_name;
+	m->default_metricgroup_name = pm->default_metricgroup_name ?: "";
 	m->modifier = NULL;
 	if (modifier) {
 		m->modifier = strdup(modifier);
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index fd26189d53be..b8419f460368 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -924,7 +924,7 @@ static __init int ndtest_init(void)
 
 	nfit_test_setup(ndtest_resource_lookup, NULL);
 
-	rc = class_regster(&ndtest_dimm_class);
+	rc = class_register(&ndtest_dimm_class);
 	if (rc)
 		goto err_register;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index a934d430c20c..a92807bfcd13 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1337,7 +1337,8 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
 }
 
 static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
-				     int sock_mapfd, int verd_mapfd, enum redir_mode mode)
+				     int sock_mapfd, int nop_mapfd,
+				     int verd_mapfd, enum redir_mode mode)
 {
 	const char *log_prefix = redir_mode_str(mode);
 	unsigned int pass;
@@ -1351,6 +1352,12 @@ static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
 	if (err)
 		return;
 
+	if (nop_mapfd >= 0) {
+		err = add_to_sockmap(nop_mapfd, cli0, cli1);
+		if (err)
+			return;
+	}
+
 	n = write(cli1, "a", 1);
 	if (n < 0)
 		FAIL_ERRNO("%s: write", log_prefix);
@@ -1387,7 +1394,7 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd,
 		goto close0;
 	c1 = sfd[0], p1 = sfd[1];
 
-	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
+	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
 
 	xclose(c1);
 	xclose(p1);
@@ -1677,7 +1684,7 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
 	if (err)
 		goto close_cli0;
 
-	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
+	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
 
 	xclose(c1);
 	xclose(p1);
@@ -1735,7 +1742,7 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
 	if (err)
 		goto close;
 
-	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
+	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
 
 	xclose(c1);
 	xclose(p1);
@@ -1770,8 +1777,10 @@ static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
 }
 
-static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
-					int verd_mapfd, enum redir_mode mode)
+static void unix_inet_redir_to_connected(int family, int type,
+					int sock_mapfd, int nop_mapfd,
+					int verd_mapfd,
+					enum redir_mode mode)
 {
 	int c0, c1, p0, p1;
 	int sfd[2];
@@ -1785,7 +1794,8 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
 		goto close_cli0;
 	c1 = sfd[0], p1 = sfd[1];
 
-	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
+	pairs_redir_to_connected(c0, p0, c1, p1,
+				 sock_mapfd, nop_mapfd, verd_mapfd, mode);
 
 	xclose(c1);
 	xclose(p1);
@@ -1799,6 +1809,7 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
 					    struct bpf_map *inner_map, int family)
 {
 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+	int nop_map = bpf_map__fd(skel->maps.nop_map);
 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
 	int sock_map = bpf_map__fd(inner_map);
 	int err;
@@ -1808,14 +1819,32 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
 		return;
 
 	skel->bss->test_ingress = false;
-	unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+	unix_inet_redir_to_connected(family, SOCK_DGRAM,
+				     sock_map, -1, verdict_map,
 				     REDIR_EGRESS);
-	unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+	unix_inet_redir_to_connected(family, SOCK_DGRAM,
+				     sock_map, -1, verdict_map,
+				     REDIR_EGRESS);
+
+	unix_inet_redir_to_connected(family, SOCK_DGRAM,
+				     sock_map, nop_map, verdict_map,
+				     REDIR_EGRESS);
+	unix_inet_redir_to_connected(family, SOCK_STREAM,
+				     sock_map, nop_map, verdict_map,
 				     REDIR_EGRESS);
 	skel->bss->test_ingress = true;
-	unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+	unix_inet_redir_to_connected(family, SOCK_DGRAM,
+				     sock_map, -1, verdict_map,
+				     REDIR_INGRESS);
+	unix_inet_redir_to_connected(family, SOCK_STREAM,
+				     sock_map, -1, verdict_map,
+				     REDIR_INGRESS);
+
+	unix_inet_redir_to_connected(family, SOCK_DGRAM,
+				     sock_map, nop_map, verdict_map,
 				     REDIR_INGRESS);
-	unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+	unix_inet_redir_to_connected(family, SOCK_STREAM,
+				     sock_map, nop_map, verdict_map,
 				     REDIR_INGRESS);
 
 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index fc6b2954e8f5..59993fc9c0d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1,6 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
 #include <test_progs.h>
 #include <network_helpers.h>
+#include "tailcall_poke.skel.h"
+
 
 /* test_tailcall_1 checks basic functionality by patching multiple locations
  * in a single program for a single tail call slot with nop->jmp, jmp->nop
@@ -1105,6 +1108,85 @@ out:
 	bpf_object__close(tgt_obj);
 }
 
+#define JMP_TABLE "/sys/fs/bpf/jmp_table"
+
+static int poke_thread_exit;
+
+static void *poke_update(void *arg)
+{
+	__u32 zero = 0, prog1_fd, prog2_fd, map_fd;
+	struct tailcall_poke *call = arg;
+
+	map_fd = bpf_map__fd(call->maps.jmp_table);
+	prog1_fd = bpf_program__fd(call->progs.call1);
+	prog2_fd = bpf_program__fd(call->progs.call2);
+
+	while (!poke_thread_exit) {
+		bpf_map_update_elem(map_fd, &zero, &prog1_fd, BPF_ANY);
+		bpf_map_update_elem(map_fd, &zero, &prog2_fd, BPF_ANY);
+	}
+
+	return NULL;
+}
+
+/*
+ * We are trying to hit prog array update during another program load
+ * that shares the same prog array map.
+ *
+ * For that we share the jmp_table map between two skeleton instances
+ * by pinning the jmp_table to same path. Then first skeleton instance
+ * periodically updates jmp_table in 'poke update' thread while we load
+ * the second skeleton instance in the main thread.
+ */
+static void test_tailcall_poke(void)
+{
+	struct tailcall_poke *call, *test;
+	int err, cnt = 10;
+	pthread_t thread;
+
+	unlink(JMP_TABLE);
+
+	call = tailcall_poke__open_and_load();
+	if (!ASSERT_OK_PTR(call, "tailcall_poke__open"))
+		return;
+
+	err = bpf_map__pin(call->maps.jmp_table, JMP_TABLE);
+	if (!ASSERT_OK(err, "bpf_map__pin"))
+		goto out;
+
+	err = pthread_create(&thread, NULL, poke_update, call);
+	if (!ASSERT_OK(err, "new toggler"))
+		goto out;
+
+	while (cnt--) {
+		test = tailcall_poke__open();
+		if (!ASSERT_OK_PTR(test, "tailcall_poke__open"))
+			break;
+
+		err = bpf_map__set_pin_path(test->maps.jmp_table, JMP_TABLE);
+		if (!ASSERT_OK(err, "bpf_map__pin")) {
+			tailcall_poke__destroy(test);
+			break;
+		}
+
+		bpf_program__set_autoload(test->progs.test, true);
+		bpf_program__set_autoload(test->progs.call1, false);
+		bpf_program__set_autoload(test->progs.call2, false);
+
+		err = tailcall_poke__load(test);
+		tailcall_poke__destroy(test);
+		if (!ASSERT_OK(err, "tailcall_poke__load"))
+			break;
+	}
+
+	poke_thread_exit = 1;
+	ASSERT_OK(pthread_join(thread, NULL), "pthread_join");
+
+out:
+	bpf_map__unpin(call->maps.jmp_table, JMP_TABLE);
+	tailcall_poke__destroy(call);
+}
+
 void test_tailcalls(void)
 {
 	if (test__start_subtest("tailcall_1"))
@@ -1139,4 +1221,6 @@ void test_tailcalls(void)
 		test_tailcall_bpf2bpf_fentry_fexit();
 	if (test__start_subtest("tailcall_bpf2bpf_fentry_entry"))
 		test_tailcall_bpf2bpf_fentry_entry();
+	if (test__start_subtest("tailcall_poke"))
+		test_tailcall_poke();
 }
diff --git a/tools/testing/selftests/bpf/progs/tailcall_poke.c b/tools/testing/selftests/bpf/progs/tailcall_poke.c
new file mode 100644
index 000000000000..c78b94b75e83
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_poke.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+SEC("?fentry/bpf_fentry_test1")
+int BPF_PROG(test, int a)
+{
+	bpf_tail_call_static(ctx, &jmp_table, 0);
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(call1, int a)
+{
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(call2, int a)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index 464d35bd57c7..b7250eb9c30c 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -15,6 +15,13 @@ struct {
 } sock_map SEC(".maps");
 
 struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, __u64);
+} nop_map SEC(".maps");
+
+struct {
 	__uint(type, BPF_MAP_TYPE_SOCKHASH);
 	__uint(max_entries, 2);
 	__type(key, __u32);
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 050e9751321c..ad9202335656 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -293,15 +293,13 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
 				    __u64 bitmap_size, __u32 flags,
 				    struct __test_metadata *_metadata)
 {
-	unsigned long i, count, nbits = bitmap_size * BITS_PER_BYTE;
+	unsigned long i, nbits = bitmap_size * BITS_PER_BYTE;
 	unsigned long nr = nbits / 2;
 	__u64 out_dirty = 0;
 
 	/* Mark all even bits as dirty in the mock domain */
-	for (count = 0, i = 0; i < nbits; count += !(i % 2), i++)
-		if (!(i % 2))
-			set_bit(i, (unsigned long *)bitmap);
-	ASSERT_EQ(nr, count);
+	for (i = 0; i < nbits; i += 2)
+		set_bit(i, (unsigned long *)bitmap);
 
 	test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size,
 				       bitmap, &out_dirty);
@@ -311,9 +309,10 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
 	memset(bitmap, 0, bitmap_size);
 	test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap,
 				  flags);
-	for (count = 0, i = 0; i < nbits; count += !(i % 2), i++)
+	/* Beware ASSERT_EQ() is two statements -- braces are not redundant! */
+	for (i = 0; i < nbits; i++) {
 		ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap));
-	ASSERT_EQ(count, out_dirty);
+	}
 
 	memset(bitmap, 0, bitmap_size);
 	test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap,
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index a5963ab9215b..963435959a92 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -18,12 +18,13 @@ else
 endif
 
 ifeq ($(ARCH),arm64)
-arm64_tools_dir := $(top_srcdir)/tools/arch/arm64/tools/
+tools_dir := $(top_srcdir)/tools
+arm64_tools_dir := $(tools_dir)/arch/arm64/tools/
 GEN_HDRS := $(top_srcdir)/tools/arch/arm64/include/generated/
 CFLAGS += -I$(GEN_HDRS)
 
 $(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*)
-	$(MAKE) -C $(arm64_tools_dir)
+	$(MAKE) -C $(arm64_tools_dir) O=$(tools_dir)
 endif
 
 LIBKVM += lib/assert.c
@@ -223,7 +224,7 @@ else
 LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
 endif
 CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-	-Wno-gnu-variable-sized-type-not-at-end -MD\
+	-Wno-gnu-variable-sized-type-not-at-end -MD -MP \
 	-fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
 	-fno-builtin-strnlen \
 	-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index 18ac5c1952a3..83e25bccc139 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -259,7 +259,7 @@ int main(int argc, char **argv)
 	__TEST_REQUIRE(token == MAGIC_TOKEN,
 		       "This test must be run with the magic token %d.\n"
 		       "This is done by nx_huge_pages_test.sh, which\n"
-		       "also handles environment setup for the test.");
+		       "also handles environment setup for the test.", MAGIC_TOKEN);
 
 	run_test(reclaim_period_ms, false, reboot_permissions);
 	run_test(reclaim_period_ms, true, reboot_permissions);
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 78dfec8bc676..dede0bcf97a3 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -60,7 +60,7 @@ TEST_GEN_FILES += mrelease_test
 TEST_GEN_FILES += mremap_dontunmap
 TEST_GEN_FILES += mremap_test
 TEST_GEN_FILES += on-fault-limit
-TEST_GEN_PROGS += pagemap_ioctl
+TEST_GEN_FILES += pagemap_ioctl
 TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
 TEST_GEN_FILES += uffd-stress
@@ -72,7 +72,7 @@ TEST_GEN_FILES += mdwe_test
 TEST_GEN_FILES += hugetlb_fault_after_madv
 
 ifneq ($(ARCH),arm64)
-TEST_GEN_PROGS += soft-dirty
+TEST_GEN_FILES += soft-dirty
 endif
 
 ifeq ($(ARCH),x86_64)
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index befab43719ba..d59517ed3d48 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -36,6 +36,7 @@ int pagemap_fd;
 int uffd;
 int page_size;
 int hpage_size;
+const char *progname;
 
 #define LEN(region)	((region.end - region.start)/page_size)
 
@@ -1149,11 +1150,11 @@ int sanity_tests(void)
 	munmap(mem, mem_size);
 
 	/* 9. Memory mapped file */
-	fd = open(__FILE__, O_RDONLY);
+	fd = open(progname, O_RDONLY);
 	if (fd < 0)
 		ksft_exit_fail_msg("%s Memory mapped file\n", __func__);
 
-	ret = stat(__FILE__, &sbuf);
+	ret = stat(progname, &sbuf);
 	if (ret < 0)
 		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
 
@@ -1472,12 +1473,14 @@ static void transact_test(int page_size)
 			      extra_thread_faults);
 }
 
-int main(void)
+int main(int argc, char *argv[])
 {
 	int mem_size, shmid, buf_size, fd, i, ret;
 	char *mem, *map, *fmem;
 	struct stat sbuf;
 
+	progname = argv[0];
+
 	ksft_print_header();
 
 	if (init_uffd())
diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c
index 5b88f7129fea..79a3dd75590e 100644
--- a/tools/testing/selftests/net/af_unix/diag_uid.c
+++ b/tools/testing/selftests/net/af_unix/diag_uid.c
@@ -148,7 +148,6 @@ void receive_response(struct __test_metadata *_metadata,
 		.msg_iov = &iov,
 		.msg_iovlen = 1
 	};
-	struct unix_diag_req *udr;
 	struct nlmsghdr *nlh;
 	int ret;
 
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 24b21b15ed3f..6ff3e732f449 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -416,9 +416,9 @@ int main(int argc, char *argv[])
 {
 	struct addrinfo hints, *ai;
 	struct iovec iov[1];
+	unsigned char *buf;
 	struct msghdr msg;
 	char cbuf[1024];
-	char *buf;
 	int err;
 	int fd;
 
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index 9a8229abfa02..be4a30a0d02a 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -2263,7 +2263,7 @@ static int check_results(void)
 
 int main(int argc, char **argv)
 {
-	unsigned int nr_process = 1;
+	long nr_process = 1;
 	int route_sock = -1, ret = KSFT_SKIP;
 	int test_desc_fd[2];
 	uint32_t route_seq;
@@ -2284,7 +2284,7 @@ int main(int argc, char **argv)
 			exit_usage(argv);
 		}
 
-		if (nr_process > MAX_PROCESSES || !nr_process) {
+		if (nr_process > MAX_PROCESSES || nr_process < 1) {
 			printk("nr_process should be between [1; %u]",
 					MAX_PROCESSES);
 			exit_usage(argv);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index c7f9ebeebc2c..d2043ec3bf6d 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -18,6 +18,7 @@
 
 #include <sys/ioctl.h>
 #include <sys/poll.h>
+#include <sys/random.h>
 #include <sys/sendfile.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
@@ -1125,15 +1126,11 @@ again:
 
 static void init_rng(void)
 {
-	int fd = open("/dev/urandom", O_RDONLY);
 	unsigned int foo;
 
-	if (fd > 0) {
-		int ret = read(fd, &foo, sizeof(foo));
-
-		if (ret < 0)
-			srand(fd + foo);
-		close(fd);
+	if (getrandom(&foo, sizeof(foo), 0) == -1) {
+		perror("getrandom");
+		exit(1);
 	}
 
 	srand(foo);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
index 8672d898f8cd..218aac467321 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -18,6 +18,7 @@
 #include <time.h>
 
 #include <sys/ioctl.h>
+#include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -519,15 +520,11 @@ static int client(int unixfd)
 
 static void init_rng(void)
 {
-	int fd = open("/dev/urandom", O_RDONLY);
 	unsigned int foo;
 
-	if (fd > 0) {
-		int ret = read(fd, &foo, sizeof(foo));
-
-		if (ret < 0)
-			srand(fd + foo);
-		close(fd);
+	if (getrandom(&foo, sizeof(foo), 0) == -1) {
+		perror("getrandom");
+		exit(1);
 	}
 
 	srand(foo);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 486800a7024b..3b1b9e8dd70c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -115,8 +115,6 @@ EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
 
 static const struct file_operations stat_fops_per_vm;
 
-static struct file_operations kvm_chardev_ops;
-
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 			   unsigned long arg);
 #ifdef CONFIG_KVM_COMPAT
@@ -1157,9 +1155,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
 	if (!kvm)
 		return ERR_PTR(-ENOMEM);
 
-	/* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */
-	__module_get(kvm_chardev_ops.owner);
-
 	KVM_MMU_LOCK_INIT(kvm);
 	mmgrab(current->mm);
 	kvm->mm = current->mm;
@@ -1279,7 +1274,6 @@ out_err_no_irq_srcu:
 out_err_no_srcu:
 	kvm_arch_free_vm(kvm);
 	mmdrop(current->mm);
-	module_put(kvm_chardev_ops.owner);
 	return ERR_PTR(r);
 }
 
@@ -1348,7 +1342,6 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	preempt_notifier_dec();
 	hardware_disable_all();
 	mmdrop(mm);
-	module_put(kvm_chardev_ops.owner);
 }
 
 void kvm_get_kvm(struct kvm *kvm)
@@ -3887,7 +3880,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static const struct file_operations kvm_vcpu_fops = {
+static struct file_operations kvm_vcpu_fops = {
 	.release        = kvm_vcpu_release,
 	.unlocked_ioctl = kvm_vcpu_ioctl,
 	.mmap           = kvm_vcpu_mmap,
@@ -4081,6 +4074,7 @@ static int kvm_vcpu_stats_release(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations kvm_vcpu_stats_fops = {
+	.owner = THIS_MODULE,
 	.read = kvm_vcpu_stats_read,
 	.release = kvm_vcpu_stats_release,
 	.llseek = noop_llseek,
@@ -4431,7 +4425,7 @@ static int kvm_device_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static const struct file_operations kvm_device_fops = {
+static struct file_operations kvm_device_fops = {
 	.unlocked_ioctl = kvm_device_ioctl,
 	.release = kvm_device_release,
 	KVM_COMPAT(kvm_device_ioctl),
@@ -4759,6 +4753,7 @@ static int kvm_vm_stats_release(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations kvm_vm_stats_fops = {
+	.owner = THIS_MODULE,
 	.read = kvm_vm_stats_read,
 	.release = kvm_vm_stats_release,
 	.llseek = noop_llseek,
@@ -5060,7 +5055,7 @@ static long kvm_vm_compat_ioctl(struct file *filp,
 }
 #endif
 
-static const struct file_operations kvm_vm_fops = {
+static struct file_operations kvm_vm_fops = {
 	.release        = kvm_vm_release,
 	.unlocked_ioctl = kvm_vm_ioctl,
 	.llseek		= noop_llseek,
@@ -6095,6 +6090,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
 		goto err_async_pf;
 
 	kvm_chardev_ops.owner = module;
+	kvm_vm_fops.owner = module;
+	kvm_vcpu_fops.owner = module;
+	kvm_device_fops.owner = module;
 
 	kvm_preempt_ops.sched_in = kvm_sched_in;
 	kvm_preempt_ops.sched_out = kvm_sched_out;